In [9]:
import tensorflow as tf
import keras
from keras.models import Model, load_model
from keras.layers import *
import keras.backend as K
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('src') # Fix for jupyter
import src.emulator as emulator
import src.emulator_utils as emulator_utils
import src.emulator_vis as emulator_vis
%matplotlib inline

# Main params

In [5]:
INPUT_SIZE = (16, 16, 4) # Map size fixed to 16x16 (2 to 3 players)
N_ACTIONS = 4

# Define the Layers Blocks

In [6]:
# Convolutional Block
def conv_block(in_layer, name, filters=128, kernel_size=(3,3), bn=True, relu=True):
    l = Conv2D(filters, kernel_size, padding='same', name = name)(in_layer)
    if bn:
        l = BatchNormalization(name = name + '_bn')(l)
    if relu:
        l = Activation('relu', name = name + '_relu')(l)
    return l

# Residual Block
def residual_conv(in_layer, idx, filters=128, kernel_size=(3,3), bn=True, relu=True):
    name = 'res_' + str(idx)
    # Full conv block of pre-defined shape
    l = conv_block(in_layer, name + '_conv1', filters, kernel_size=(3,3), bn=True, relu=True)
    # Second block with skip connection
    l = Conv2D(filters, kernel_size, padding='same', name = name + '_conv2')(l)
    if bn:
        l = BatchNormalization(name = name + '_conv2_bn')(l)
    l = Concatenate()([in_layer, l]) # Skip conn.
    if relu:
        l = Activation('relu', name = name + '_relu')(l)
    return l

def value_head(in_layer):
    l = conv_block(in_layer, 'value_head', filters=1, kernel_size=(1,1))
    l = Flatten(name = 'value_flatten')(l)
    l = Dense(128, name = 'value_dense')(l)
    l = Activation('relu', name = 'value_relu')(l)
    l = Dense(1, name = 'value', activation='tanh')(l) # Value output
    return l

def policy_head(in_layer):
    l = conv_block(in_layer, 'policy_head', filters=2, kernel_size=(1,1))
    l = Flatten(name = 'policy_flatten')(l)
    l = Dense(N_ACTIONS, name = 'policy', activation='softmax')(l) # Policy output
    return l

# Model

In [7]:
n_residual = 16

input_layer = Input(INPUT_SIZE)
l = conv_block(input_layer, 'conv')
for i in range(n_residual):
    l = residual_conv(l, idx=i + 1)

policy = policy_head(l)
value = value_head(l)

alphabot = Model(input_layer, [policy, value])
alphabot.summary(line_length=112)

________________________________________________________________________________________________________________
Layer (type)                        Output Shape             Param #       Connected to                         
input_1 (InputLayer)                (None, 16, 16, 4)        0                                                  
________________________________________________________________________________________________________________
conv (Conv2D)                       (None, 16, 16, 128)      4736          input_1[0][0]                        
________________________________________________________________________________________________________________
conv_bn (BatchNormalization)        (None, 16, 16, 128)      512           conv[0][0]                           
________________________________________________________________________________________________________________
conv_relu (Activation)              (None, 16, 16, 128)      0             conv_bn[0][0]        

In [23]:
n_players = 2
game = emulator.Game(n_players)
gmap, p_alive, n_alive, rewards, game_end = game(actions)