# s{t} = [X{t}, Y{t}, X{t−1}, Y{t−1},..., X{t−7}, Y{t−7}, C].
The input features s{t} are processed by a residual tower that consists of a single convolutional block 
followed by either 19 or 39 residual blocks.

The convolutional block applies the following modules:
(1) A convolution of 256 filters of kernel size 3 × 3 with stride 1
(2) Batch normalization
(3) A rectifier nonlinearity

Each residual block applies the following modules sequentially to its input: 
(1) A convolution of 256 filters of kernel size 3 × 3 with stride 1
(2) Batch normalization
(3) A rectifier nonlinearity
(4) A convolution of 256 filters of kernel size 3 × 3 with stride 1
(5) Batch normalization
(6) A skip connection that adds the input to the block
(7) A rectifier nonlinearity

The output of the residual tower is passed into two separate ‘heads’ for computing the policy and value. 

The policy head applies the following modules: 
(1) A convolution of 2 filters of kernel size 1 × 1 with stride 1
(2) Batch normalization
(3) A rectifier nonlinearity
(4) A fully connected linear layer that outputs a vector of size 19*19 + 1 = 362, corresponding to logit probabilities for all intersections and the pass move

The value head applies the following modules:
(1) A convolution of 1 filter of kernel size 1 × 1 with stride 1 
(2) Batch normalization
(3) A rectifier nonlinearity
(4) A fully connected linear layer to a hidden layer of size 256
(5) A rectifier nonlinearity
(6) A fully connected linear layer to a scalar
(7) A tanh nonlinearity outputting a scalar in the range [−1, 1]

The overall network depth, in the 20­ or 40­ block network, is 39 or 79 parameterized layers, respectively, 
for the residual tower, plus an additional 2 layers for the policy head and 3 layers for the value head.
"""


In [21]:
import keras
from keras.layers import Activation, BatchNormalization
from keras.layers import Conv2D, Dense, Flatten, Input
from keras.models import Model
import numpy as np
import time


class trojanGoZero:
    def __init__(self, num_resnet_block=19):
        #self.board_input = Input(shape=TrojanGoPlane.shape(), name='board_input')
        self.board_input = Input(shape=(7, 5, 5), name='board_input')
        self.num_resnet_block = num_resnet_block
        self.num_filters = 256

    
    def resNetBlock(self, x,filters,pool=False):
        res = x
        
        if pool:
            x = MaxPooling2D(pool_size=(2, 2))(x)
            res = Conv2D(filters=filters,kernel_size=[1,1],strides=(2,2),padding="same", data_format='channels_first')(res)
        #out = BatchNormalization()(x)
        #out = Activation("relu")(out)
            
        out = Conv2D(filters=filters, kernel_size=[3, 3], strides=[1, 1], padding="same", data_format='channels_first')(x)
        out = BatchNormalization()(out)
        out = Activation("relu")(out)
        
        out = Conv2D(filters=filters, kernel_size=[3, 3], strides=[1, 1], padding="same", data_format='channels_first')(out)
        out = BatchNormalization()(out)
        
        out = keras.layers.add([res,out])
        out = Activation("relu")(out)

        return out


        
    def nn_model(self, input_shape):
        #Input feature of 17*19*19 or 7*5*5 as board_input or board_images
        
        board_images = Input(input_shape)
        #board_images = self.board_input

        #CNN-1 with Batch Normalization and rectifier nonlinearity.
        cnn1 = Conv2D(filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", data_format='channels_first')(board_images)
        cnn1_batch = BatchNormalization()(cnn1)
        cnn1_act = Activation("relu")(cnn1_batch)

        self_in = cnn1_act
        
        #Now build 19 or 39 ResNet block networks depends on "num_resnet_block" variable.
        for i in range(self.num_resnet_block):
            self_out = self.resNetBlock(self_in, self.num_filters)
            self_in = self_out

        out = self_out
        
        policy_conv = \
            Conv2D(2, (1, 1),                          # <2>
                data_format='channels_first',          # <2>
                activation='relu')(out)                # <2>
        policy_conv_bn = BatchNormalization()(policy_conv)
        policy_flat = Flatten()(policy_conv_bn)           # <2>
        policy_output = \
            Dense(25,
                  activation='softmax')(policy_flat)   # <2>




        value_conv = \
            Conv2D(1, (1, 1),                                    # <3>
                data_format='channels_first',                    # <3>
                activation='relu')(out)                          # <3>
        value_conv_bn = BatchNormalization()(value_conv)
        value_flat = Flatten()(value_conv_bn)                       # <3>
        value_hidden = Dense(256, activation='relu')(value_flat) # <3>
        value_output = Dense(1, activation='tanh')(value_hidden) # <3>

        model = Model(
            inputs=[board_images],
            outputs=[policy_output, value_output])

        return model


In [22]:
net = trojanGoZero()
input_shape = (7,5,5)
model = net.nn_model(input_shape)
print(model.summary())
    

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 7, 5, 5)      0                                            
__________________________________________________________________________________________________
conv2d_83 (Conv2D)              (None, 256, 5, 5)    16384       input_3[0][0]                    
__________________________________________________________________________________________________
batch_normalization_80 (BatchNo (None, 256, 5, 5)    20          conv2d_83[0][0]                  
__________________________________________________________________________________________________
activation_79 (Activation)      (None, 256, 5, 5)    0           batch_normalization_80[0][0]     
____________________________________________________________________________________________

In [23]:

model_input = []

for _ in range(100):
    board_tensor = np.random.randint(0, 3, size=(7, 5, 5))
    model_input.append(board_tensor)

model_input = np.array(model_input) 
 

action_target = []
for _ in range (100):
    search_prob = np.random.randn(5,5)
    search_prob_flat = search_prob.reshape(25,)
    action_target.append(search_prob_flat)
    
action_target = np.array(action_target)    


value_target = np.random.rand(100)
value_target = np.array(value_target) 




In [24]:
#pickle the data
import pickle 
def storeData():
    start = time.time()
    # database 
    db = {} 
    db['model_input'] = model_input 
    db['action_target'] = action_target 
    db['value_target'] = value_target 
      
    # Its important to use binary mode 
    dbfile = open('examplePickle', 'ab') 
      
    # source, destination 
    pickle.dump(db, dbfile)                      
    dbfile.close() 
    finish = time.time()
    print("Write Time taken :", finish - start)

def loadData(): 
    start = time.time()
    # for reading also binary mode is important 
    dbfile = open('examplePickle', 'rb')      
    db = pickle.load(dbfile) 
    """
    for keys in db: 
        print(keys, '=>', db[keys]) 
    """    
    dbfile.close() 
    finish = time.time()
    print("Read Time taken :", finish - start)
 
storeData()
loadData()

Write Time taken : 0.0016620159149169922
Read Time taken : 0.6699378490447998


In [25]:
#use HDF5 to store and load
import h5py


class ExperienceBuffer:
    def __init__(self, model_input, action_target, value_target):
        self.model_input = model_input
        self.action_target = action_target
        self.value_target = value_target
        
    def serialize(self, h5file):
        h5file.create_group('experience')
        h5file['experience'].create_dataset('model_input', data=self.model_input)
        h5file['experience'].create_dataset('action_target', data=self.action_target)
        h5file['experience'].create_dataset('value_target', data=self.value_target)

    
    def load_experience(self, h5file):
        return ExperienceBuffer(model_input=np.array(h5file['experience']['model_input']),
                               action_target=np.array(h5file['experience']['action_target']),
                               value_target=np.array(h5file['experience']['value_target'])
                               )

start = time.time() 
with h5py.File('test.hdf5', 'w') as exp_outf:
    ExperienceBuffer(model_input, action_target, value_target).serialize(exp_outf)
    
finish = time.time()
print("Write Time taken :", finish - start)  


start = time.time() 
with h5py.File('test.hdf5', 'r') as exp_input:
    experience_buffer = ExperienceBuffer(model_input, action_target, value_target).load_experience(exp_input)

finish = time.time()
print("Read Time taken :", finish - start)     

Write Time taken : 0.004450082778930664
Read Time taken : 0.0031900405883789062


In [26]:
from keras.optimizers import SGD
model.compile(SGD(lr=0.01), loss=['categorical_crossentropy', 'mse'])

In [27]:
import time
start = time.time()
model.fit(model_input, [action_target, value_target], batch_size=64, epochs=1)
finish = time.time()
print("Time taken : ", finish - start)

Epoch 1/1
Time taken :  35.297181844711304


In [28]:
X = model_input[0]
X = np.expand_dims(X, axis=0)
print(X.shape)
prediction = model.predict(X)
print(prediction)


(1, 7, 5, 5)
[array([[3.21205844e-30, 4.52979167e-25, 1.63890921e-28, 2.46900555e-07,
        1.23443059e-03, 2.92117386e-18, 3.61564889e-06, 5.21536460e-12,
        3.65150047e-17, 6.40434374e-13, 3.10723808e-10, 2.85036385e-01,
        1.30185085e-36, 4.21520836e-20, 4.97761403e-06, 7.14626068e-33,
        1.71372412e-05, 6.64920272e-11, 3.01236147e-03, 7.04783201e-01,
        6.56724873e-23, 1.28870579e-05, 5.89487003e-03, 5.09547574e-30,
        3.05133518e-08]], dtype=float32), array([[0.5846347]], dtype=float32)]


In [29]:
index = np.argmax(prediction[0])
rows = int(index/5)
cols = index%5
print("Move : ", (rows, cols))
print("Win chance :", prediction[1])

Move :  (3, 4)
Win chance : [[0.5846347]]


In [30]:
import tensorflow as tf
filepath = 'model'
#save the model
model.save('model')


In [31]:
"""
import tensorflow as tf
#load the model
loaded_model = tf.keras.models.load_model('model')
"""

"\nimport tensorflow as tf\n#load the model\nloaded_model = tf.keras.models.load_model('model')\n"