In [2]:
import numpy as np
import os
import tensorflow as tf

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger, TensorBoard
from keras.layers import Input, Dense, Flatten, Convolution2D, BatchNormalization, Activation, Add
from keras.models import Model
from keras.utils import to_categorical
from keras import backend as K
from pommerman.agents import BaseAgent, SimpleAgent
from pommerman.configs import ffa_competition_env
from pommerman.constants import BOARD_SIZE
from pommerman.envs.v0 import Pomme
from tqdm import tqdm_notebook as tqdm
from sklearn.utils import class_weight

In [3]:
batch_size = 1024
epochs = 150
early_stopping = 7

log_path = './dagger/logs/il_go_5res_block/'
model_path = './dagger/model/il_go_5res_block/model.h4'
train_data_path = './dagger/train_data5/'
train_data_obs = 'obs.npy'
train_data_preprocessed = 'obs_map.npy'
train_data_labels = 'labels.npy'

In [4]:
class Agent:
    def __init__(self, actions, seed=0, save_path="./dagger/model/model.h4", 
                 log_path='./dagger/logs/', save_best_only=True):
        K.clear_session()
        self.log_path = log_path
        self.save_path = save_path
        self.actions = actions
        self.save_best_only = save_best_only
        self.rewards = []
        self.current_epoch = 0        
        
        self.model = self.create_model(actions)
        if not os.path.isdir(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))            
        if os.path.isfile(self.save_path):
            try:
                print("Trying to load model")
                self.model.load_weights(self.save_path)
                print("Model was loaded successful")
            except:
                print("Model load failed")
    
    def get_res_block(self, input):
        # Res block 1        
        x = Convolution2D(256, 3, padding='same')(input)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Convolution2D(256, 3, padding='same')(x)
        x = BatchNormalization()(x)
        x = Add()([input, x])
        x = Activation('relu')(x)
        return x
    
    def create_model(self, actions, input_shape=(11, 11, 17,)):
        inp = Input(input_shape)
        x = Convolution2D(256, 3, padding='same')(inp)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        
        # Ten residual blocks
        for i in range(5):
            x = self.get_res_block(x)
        
        # Output block
        # Should be 2 filters
        x = Convolution2D(4, 1, padding='same')(x)
        x = BatchNormalization()(x)   
        x = Activation('relu')(x)
        x = Flatten()(x)
        out = Dense(actions, activation='softmax')(x)
        model = Model(inputs = inp, outputs=out)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train(self, obs, labels, batch_size=16384, epochs=100, early_stopping = 10, class_weight=None, initial_epoch=0):
        early_stopping = EarlyStopping(monitor='loss', patience=early_stopping)
        checkpoint = ModelCheckpoint(self.save_path, monitor='loss', save_best_only=self.save_best_only)
        reduce_lr = ReduceLROnPlateau(monitor='loss', patience=3, factor=0.8)
        logger = CSVLogger(self.log_path + 'log.csv', append=True)
        tensorboard = TensorBoard(self.log_path, batch_size=batch_size)
        
        history = self.model.fit(x=obs, y=labels, batch_size=batch_size, epochs=epochs, verbose=1,
                       callbacks=[early_stopping, checkpoint, reduce_lr, logger, tensorboard],
                       validation_split=0.15, shuffle=True, class_weight=class_weight, initial_epoch=initial_epoch)
        self.model.load_weights(self.save_path)
        self.current_epoch += len(history.history['lr'])

    @staticmethod
    def featurize(obs):
        shape = (BOARD_SIZE, BOARD_SIZE, 1)

        def get_matrix(dict, key):
            res = dict[key]
            return res.reshape(shape).astype(np.float32)

        def get_map(board, item):
            map = np.zeros(shape)
            map[board == item] = 1
            return map

        board = get_matrix(obs, 'board')

        # TODO: probably not needed Passage = 0
        rigid_map = get_map(board, 1)               # Rigid = 1
        wood_map = get_map(board, 2)                # Wood = 2
        bomb_map = get_map(board, 3)                # Bomb = 3
        flames_map = get_map(board, 4)              # Flames = 4
        fog_map = get_map(board, 5)                 # TODO: not used for first two stages Fog = 5
        extra_bomb_map = get_map(board, 6)          # ExtraBomb = 6
        incr_range_map = get_map(board, 7)          # IncrRange = 7
        kick_map = get_map(board, 8)                # Kick = 8
        skull_map = get_map(board, 9)               # Skull = 9

        position = obs["position"]
        my_position = np.zeros(shape)
        my_position[position[0], position[1], 0] = 1

        team_mates = get_map(board, obs["teammate"].value) # TODO during documentation it should be an array

        enemies = np.zeros(shape)
        for enemy in obs["enemies"]:
            enemies[board == enemy.value] = 1

        bomb_blast_strength = get_matrix(obs, 'bomb_blast_strength')
        bomb_life = get_matrix(obs, 'bomb_life')

        ammo = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["ammo"])
        blast_strength = np.full((BOARD_SIZE, BOARD_SIZE, 1), obs["blast_strength"])
        can_kick = np.full((BOARD_SIZE, BOARD_SIZE, 1), int(obs["can_kick"]))

        obs = np.concatenate([my_position, enemies, team_mates, rigid_map,
                              wood_map, bomb_map, flames_map,
                              fog_map, extra_bomb_map, incr_range_map,
                              kick_map, skull_map, bomb_blast_strength,
                              bomb_life, ammo, blast_strength, can_kick], axis=2)
        return obs    

In [5]:
if os.path.isdir(train_data_path):
    if os.path.isfile(train_data_path + train_data_preprocessed):
        full_obs = np.load(train_data_path + train_data_preprocessed)
    else:
        full_obs = np.load(train_data_path + train_data_obs)
        temp = []
        for obs in tqdm(full_obs):
            temp.append(Agent.featurize(obs))
        full_obs = np.array(temp)
    full_labels = np.load(train_data_path + train_data_labels)
else:
    # Generate training data
    training_data, _ = stimulator.stimulate(expert, num_rollouts=initial_rollouts)
    full_obs = training_data[0]
    full_labels = training_data[1]

In [6]:
np.sum(full_labels, axis=0) / np.sum(full_labels)

array([0.15266687, 0.20545281, 0.19221762, 0.20086664, 0.20515   ,
       0.04364607], dtype=float32)

In [7]:
# Shuffle data
indices = np.arange(full_obs.shape[0])
np.random.seed(0)
np.random.shuffle(indices)

full_labels = full_labels[indices]
full_obs = full_obs[indices]

In [8]:
# Instantiate the environment
config = ffa_competition_env()
env = Pomme(**config["env_kwargs"])
agent = Agent(env.action_space.n, save_path=model_path, log_path=log_path)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Trying to load model
Model was loaded successful


In [9]:
agent.model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 11, 11, 17)   0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 11, 11, 256)  39424       input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 11, 11, 256)  1024        conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 11, 11, 256)  0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [10]:
cw = class_weight.compute_class_weight('balanced', np.unique(np.argmax(full_labels, axis=1)), np.argmax(full_labels, axis=1))
cw

array([1.09170166, 0.81121628, 0.86707277, 0.82973792, 0.81241372,
       3.81859471])

In [None]:
agent.train(full_obs, full_labels, batch_size=batch_size, epochs=epochs, 
            early_stopping=early_stopping, class_weight=cw, initial_epoch=0)

Train on 1125567 samples, validate on 198630 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150

In [None]:
print("finish")