In [1]:
import numpy as np
import os

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger, TensorBoard
from keras.layers import Input, Dense, Flatten, Convolution2D, BatchNormalization, Activation, Add
from keras.models import Model
from keras.utils import to_categorical
from keras import backend as K
from pommerman.constants import BOARD_SIZE
from tqdm import tqdm_notebook as tqdm
from sklearn.utils.class_weight import compute_class_weight

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


Import error! You will not be able to render --> Cannot connect to "None"


In [2]:
batch_size = 2048
epochs = 150
early_stopping = 5
action_space = 6

log_path = './supervised_learning/logs/go_3res_block_tanh/'
model_path = './supervised_learning/model/go_3res_block_tanh/model.h4'

train_data_path    = './dataset/'
train_data_labels  = os.path.join(train_data_path, 'labels.npy')
train_data_reward  = os.path.join(train_data_path, 'reward.npy')
train_data_obs_map = os.path.join(train_data_path, 'obs_map.npy')

if not os.path.isdir(train_data_path):
    os.makedirs(train_data_path)

In [3]:
class Trainer:
    def __init__(self, actions, save_path, log_path, save_best_only=True, seed=0):
        K.clear_session()
        self.log_path = log_path
        self.save_path = save_path
        self.actions = actions
        self.save_best_only = save_best_only
        
        # Create model
        self.model = self.create_model(actions)
        # Load model if exists
        if not os.path.isdir(os.path.dirname(save_path)):
            os.makedirs(os.path.dirname(save_path))            
        if os.path.isfile(self.save_path):
            try:
                print("Trying to load model")
                self.model.load_weights(self.save_path)
                print("Model was loaded successful")
            except:
                print("Model load failed")
        
    def get_res_block(self, input):
        # Res block 1        
        x = Convolution2D(256, 3, padding='same')(input)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Convolution2D(256, 3, padding='same')(x)
        x = BatchNormalization()(x)
        x = Add()([input, x])
        x = Activation('relu')(x)
        return x
        
    def create_model(self, actions, input_shape=(11, 11, 18,)):
        inp = Input(input_shape)
        x = Convolution2D(256, 3, padding='same')(inp)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        
        # 3 residual blocks
        for i in range(3):
            x = self.get_res_block(x)
        
        # Output block
        # Should be 2 filters
        x = Convolution2D(2, 1, padding='same')(x)
        x = BatchNormalization()(x)   
        x = Activation('relu')(x)
        x = Flatten()(x)  
        
        probs  = Dense(actions, activation='softmax', name='actions')(x)
        reward = Dense(1, activation='tanh', name='reward')(x)
        
        model = Model(inputs = inp, outputs=[probs, reward])
        model.compile(optimizer='adam', loss=['categorical_crossentropy', 'mae'], metrics=['accuracy'])
        return model

    def train(self, obs, actions, rewards, batch_size=16384, epochs=100,
              early_stopping = 10, class_weight=None, initial_epoch=0):
        
        early_stopping = EarlyStopping(monitor='loss', patience=early_stopping)
        checkpoint     = ModelCheckpoint(self.save_path, monitor='loss', save_best_only=self.save_best_only)
        reduce_lr      = ReduceLROnPlateau(monitor='loss', patience=2, factor=0.1)
        logger         = CSVLogger(self.log_path + 'log.csv', append=True)
        tensorboard    = TensorBoard(self.log_path, batch_size=batch_size)
        
        history = self.model.fit(x=obs, y=[actions, rewards], batch_size=batch_size, epochs=epochs, verbose=1,
                       callbacks=[early_stopping, checkpoint, reduce_lr, logger, tensorboard],
                       validation_split=0.15, shuffle=True, class_weight=class_weight, initial_epoch=initial_epoch)

In [4]:
labels       = np.load(train_data_labels)
observations = np.load(train_data_obs_map)
rewards      = np.load(train_data_reward)

In [5]:
labels = to_categorical(labels, num_classes=action_space)

In [6]:
labels.shape, observations.shape, rewards.shape

((553065, 6), (553065, 11, 11, 18), (553065,))

In [7]:
np.sum(labels, axis=0) / np.sum(labels)

array([0.15941164, 0.20397964, 0.190945  , 0.20009764, 0.20339382,
       0.04217226], dtype=float32)

In [8]:
class_weights = compute_class_weight('balanced', np.unique(np.argmax(labels, axis=1)), np.argmax(labels, axis=1))
class_weights

array([1.04551126, 0.81707501, 0.87285166, 0.83292671, 0.81942839,
       3.9520451 ])

In [9]:
trainer = Trainer(action_space, model_path, log_path)

In [10]:
trainer.model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 11, 11, 18)   0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 11, 11, 256)  41728       input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 11, 11, 256)  1024        conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 11, 11, 256)  0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

In [None]:
trainer.train(observations, labels, rewards, batch_size=batch_size, 
              epochs=epochs, early_stopping=early_stopping, class_weight=[class_weights, 1])

Train on 470105 samples, validate on 82960 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150

In [13]:
print("finish")

finish


In [None]:
trainer.model.save(model_path)