In [None]:
import numpy as np
import os
import tensorflow as tf

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from keras.layers import Input, Dense, Flatten, Convolution2D
from keras.models import Model
from keras.utils import to_categorical
from pommerman.agents import BaseAgent, SimpleAgent
from pommerman.configs import ffa_v0_env
from pommerman.constants import BOARD_SIZE
from pommerman.envs.v0 import Pomme

In [2]:
batch_size = 16384
epochs = 400
early_stopping = 200

log_path = './dagger/logs/il_cnn2dense1'
model_path = './dagger/model/il_cnn2dense1/model.h4'
train_data_path = './dagger/train_data/'
train_data_obs = 'obs.npy'
train_data_labels = 'labels.npy'

In [3]:
class Logger(object):
    """Logging in tensorboard without tensorflow ops."""

    def __init__(self, log_dir):
        """Creates a summary writer logging to log_dir."""
        self.writer = tf.summary.FileWriter(log_dir)

    def log_scalar(self, tag, value, step):
        """Log a scalar variable.
        Parameter
        ----------
        tag : basestring
            Name of the scalar
        value
        step : int
            training iteration
        """
        summary = tf.Summary(value=[tf.Summary.Value(tag=tag,
                                                     simple_value=value)])
        self.writer.add_summary(summary, step)

In [2]:
class Agent:
    def __init__(self, actions, seed=0, save_path="./dagger/model/model.h4", 
                 log_path='./dagger/logs/', save_best_only=True):
        self.log_path = log_path
        self.save_path = save_path
        self.actions = actions
        self.save_best_only = save_best_only
        self.rewards = []
        self.current_epoch = 0        
        self.logger = Logger(self.log_path)        
        
        self.model = self.create_model(actions)
        if not os.path.isdir(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))            
        if os.path.isfile(self.save_path):
            try:
                print("Trying to load model")
                self.model.load_weights(self.save_path)
                print("Model was loaded successful")
            except:
                print("Model load failed")

    def create_model(self, actions, input_shape=(13, 13, 17,)):
        inp = Input(input_shape)
        x = Convolution2D(64, 3)(inp)
        x = Convolution2D(64, 3)(x)
        x = Convolution2D(64, 3)(x)
        x = Flatten()(x)
        x = Dense(128, activation='relu')(x)         
        out = Dense(actions, activation='softmax')(x)
        model = Model(inputs = inp, outputs=out)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train(self, obs, labels, batch_size=16384, epochs=100, early_stopping = 10):
        early_stopping = EarlyStopping(monitor='loss', patience=early_stopping)
        checkpoint = ModelCheckpoint(self.save_path, monitor='loss', save_best_only=self.save_best_only)
        reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3, factor=0.09, epsilon=0.0001)
        logger = CSVLogger(self.log_path + 'log.csv', append=True)
        
        history = self.model.fit(x=obs, y=labels, batch_size=batch_size, epochs=epochs, verbose=1,
                       callbacks=[early_stopping, checkpoint, reduce_lr, logger],
                       validation_split=0.2, shuffle=True)
        self.model.load_weights(self.save_path)
        self.log_history(history)
        self.current_epoch += len(history.history['lr'])
    
    def log_history(self, history):
        def log(history, name, text=None):
            if text is None:
                text = name
            for ind, el in enumerate(history[name]):
                self.add_log(text, el, self.current_epoch + ind + 1)
        log(history.history, 'val_loss')
        log(history.history, 'val_acc')
        log(history.history, 'loss')
        log(history.history, 'acc')
        log(history.history, 'lr')

    @staticmethod
    def featurize(obs):
        shape = (BOARD_SIZE, BOARD_SIZE, 1)

        def get_matrix(dict, key):
            res = dict[key]
            return res.reshape(shape).astype(np.float32)

        def get_map(board, item):
            map = np.zeros(shape)
            map[board == item] = 1
            return map

        board = get_matrix(obs, 'board')

        # TODO: probably not needed Passage = 0
        rigid_map = get_map(board, 1)               # Rigid = 1
        wood_map = get_map(board, 2)                # Wood = 2
        bomb_map = get_map(board, 3)                # Bomb = 3
        flames_map = get_map(board, 4)              # Flames = 4
        fog_map = get_map(board, 5)                 # TODO: not used for first two stages Fog = 5
        extra_bomb_map = get_map(board, 6)          # ExtraBomb = 6
        incr_range_map = get_map(board, 7)          # IncrRange = 7
        kick_map = get_map(board, 8)                # Kick = 8
        skull_map = get_map(board, 9)               # Skull = 9

        position = obs["position"]
        my_position = np.zeros(shape)
        my_position[position[0], position[1], 0] = 1

        team_mates = get_map(board, obs["teammate"].value) # TODO during documentation it should be an array

        enemies = np.zeros(shape)
        for enemy in obs["enemies"]:
            enemies[board == enemy.value] = 1

        bomb_blast_strength = get_matrix(obs, 'bomb_blast_strength')
        bomb_life = get_matrix(obs, 'bomb_life')

        ammo = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["ammo"])
        blast_strength = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["blast_strength"])
        can_kick = np.full((BOARD_SIZE, BOARD_SIZE, 1), int(obs["can_kick"]))

        obs = np.concatenate([my_position, enemies, team_mates, rigid_map,
                              wood_map, bomb_map, flames_map,
                              fog_map, extra_bomb_map, incr_range_map,
                              kick_map, skull_map, bomb_blast_strength,
                              bomb_life, ammo, blast_strength, can_kick], axis=2)
        return obs    
        
    def add_log(self, tag, value, step):
        self.logger.log_scalar(tag, value, step)

In [5]:
class Agent2:
    def __init__(self, actions, seed=0, save_path="./dagger/model/cnn3dense2/model.h4", 
                 log_path='./dagger/logs/cnn3dense2/', save_best_only=True):
        self.log_path = log_path
        self.save_path = save_path
        self.actions = actions
        self.save_best_only = save_best_only
        self.rewards = []
        self.current_epoch = 0        
        self.logger = Logger(self.log_path)        
        
        self.model = self.create_model(actions)
        if not os.path.isdir(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))            
        if os.path.isfile(self.save_path):
            try:
                print("Trying to load model")
                self.model.load_weights(self.save_path)
                print("Model was loaded successful")
            except:
                print("Model load failed")

    def create_model(self, actions, input_shape=(13, 13, 17,)):
        inp = Input(input_shape)
        x = Convolution2D(64, 3)(inp)
        x = Convolution2D(64, 3)(x)
        x = Convolution2D(64, 3)(x)
        x = Flatten()(x)
        x = Dense(128, activation='relu')(x)         
        x = Dense(128, activation='relu')(x)    
        out = Dense(actions)(x)
        model = Model(inputs = inp, outputs=out)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train(self, obs, labels, batch_size=16384, epochs=100, early_stopping = 10):
        early_stopping = EarlyStopping(monitor='loss', patience=early_stopping)
        checkpoint = ModelCheckpoint(self.save_path, monitor='loss', save_best_only=self.save_best_only)
        reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3, factor=0.9, verbose=1)
        logger = CSVLogger(self.log_path + 'log.csv', append=True)
        
        history = self.model.fit(x=obs, y=labels, batch_size=batch_size, epochs=epochs, verbose=1,
                       callbacks=[early_stopping, checkpoint, reduce_lr, logger],
                       validation_split=0.2, shuffle=True)
        self.model.load_weights(self.save_path)
        self.log_history(history)
        self.current_epoch += len(history.history['lr'])
    
    def log_history(self, history):
        def log(history, name, text=None):
            if text is None:
                text = name
            for ind, el in enumerate(history[name]):
                self.add_log(text, el, self.current_epoch + ind + 1)
        log(history.history, 'val_loss')
        log(history.history, 'val_acc')
        log(history.history, 'loss')
        log(history.history, 'acc')
        log(history.history, 'lr')

    @staticmethod
    def featurize(obs):
        shape = (BOARD_SIZE, BOARD_SIZE, 1)

        def get_matrix(dict, key):
            res = dict[key]
            return res.reshape(shape).astype(np.float32)

        def get_map(board, item):
            map = np.zeros(shape)
            map[board == item] = 1
            return map

        board = get_matrix(obs, 'board')

        # TODO: probably not needed Passage = 0
        rigid_map = get_map(board, 1)               # Rigid = 1
        wood_map = get_map(board, 2)                # Wood = 2
        bomb_map = get_map(board, 3)                # Bomb = 3
        flames_map = get_map(board, 4)              # Flames = 4
        fog_map = get_map(board, 5)                 # TODO: not used for first two stages Fog = 5
        extra_bomb_map = get_map(board, 6)          # ExtraBomb = 6
        incr_range_map = get_map(board, 7)          # IncrRange = 7
        kick_map = get_map(board, 8)                # Kick = 8
        skull_map = get_map(board, 9)               # Skull = 9

        position = obs["position"]
        my_position = np.zeros(shape)
        my_position[position[0], position[1], 0] = 1

        team_mates = get_map(board, obs["teammate"].value) # TODO during documentation it should be an array

        enemies = np.zeros(shape)
        for enemy in obs["enemies"]:
            enemies[board == enemy.value] = 1

        bomb_blast_strength = get_matrix(obs, 'bomb_blast_strength')
        bomb_life = get_matrix(obs, 'bomb_life')

        ammo = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["ammo"])
        blast_strength = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["blast_strength"])
        can_kick = np.full((BOARD_SIZE, BOARD_SIZE, 1), int(obs["can_kick"]))

        obs = np.concatenate([my_position, enemies, team_mates, rigid_map,
                              wood_map, bomb_map, flames_map,
                              fog_map, extra_bomb_map, incr_range_map,
                              kick_map, skull_map, bomb_blast_strength,
                              bomb_life, ammo, blast_strength, can_kick], axis=2)
        return obs    
        
    def add_log(self, tag, value, step):
        self.logger.log_scalar(tag, value, step)

In [6]:
if os.path.isdir(train_data_path):
    full_obs = np.load(train_data_path + train_data_obs)
    full_labels = np.load(train_data_path + train_data_labels)
else:
    # Generate training data
    training_data, _ = stimulator.stimulate(expert, num_rollouts=initial_rollouts)
    full_obs = training_data[0]
    full_labels = training_data[1]
temp = []
for obs in full_obs:
    temp.append(Agent.featurize(obs))
full_obs = np.array(temp)

In [7]:
# Instantiate the environment
config = ffa_v0_env()
env = Pomme(**config["env_kwargs"])

#agent = Agent(env.action_space.n, save_path=model_path, log_path=log_path)
agent = Agent(env.action_space.n, save_path=model_path, log_path=log_path)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [None]:
agent.train(full_obs, full_labels, batch_size=batch_size, epochs=epochs, early_stopping=early_stopping)

Train on 1005208 samples, validate on 251303 samples
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
 131072/1005208 [==>...........................] - ETA: 10:15 - loss: 1.1921e-07 - acc: 0.3651

In [None]:
agent2 = Agent2(env.action_space.n)

In [None]:
agent2.train(full_obs, full_labels, batch_size=batch_size, epochs=epochs, early_stopping=early_stopping)