In [1]:
from __future__ import division, absolute_import, print_function

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import numpy as np
import tensorflow as tf
tf.__version__

'1.13.1'

# Detecting lines of 5 with a ConvNet and hand-woven features

In [4]:
hor=np.zeros([5,5], dtype=float)
hor[2]=1
diag=np.eye(5, dtype=float)
filters = np.array([hor, hor.T, diag, diag[::-1]])
kernel_init = tf.constant_initializer(np.rollaxis(filters, 0, 3))
bias_init = tf.constant_initializer(-4.)

## Take particular note of the shape: Channels last
np.shape(kernel_init.value) 

(5, 5, 4)

### Verifying  the function with some examples

In [5]:
boards = np.zeros([6, 10, 10])
for i in range(5):
    boards[0][5][3+i] = 1.
    boards[1][3+i][5] = 1.
    boards[2][8-i][3+i] = 1.
    boards[3][2+i][2+i] = 1.
    boards[4][2+i][2+i] = 1.
    boards[5][2+i][2+i] = 1.
boards[0]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 1., 1., 1., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [6]:
inp=tf.constant(boards.reshape(-1,10, 10, 1))
out = tf.layers.conv2d(kernel_size=5, kernel_initializer=kernel_init, 
                       filters=4, inputs=inp, padding='same', 
                       bias_initializer=bias_init, activation='relu')
out = tf.layers.max_pooling2d(inputs=out, pool_size=10, strides=1)

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    res = sess.run(out)
res = np.squeeze(np.rollaxis(res, -1, 0))
print(res)

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
[[1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 1. 1.]
 [0. 0. 1. 0. 0. 0.]]


You see that every sample (the six columns) has a $1$ at the position that corresponds to the particular pattern that has been recognized.

In [7]:
sum(res)

array([1., 1., 1., 1., 1., 1.])

### Creating labels with the hand-crafted features

The *labels* graph maps each sample that contains a line of 5 to a $1$, all others to a $0$

In [8]:
inp_heuristics = tf.placeholder(name="inp_heuristics", shape=[None, 10, 10, 1], dtype=tf.float32)
out = tf.layers.conv2d(kernel_size=5, kernel_initializer=kernel_init, 
                       filters=4, inputs=inp_heuristics, padding='same', 
                       bias_initializer=bias_init, activation='relu')
out = tf.layers.max_pooling2d(inputs=out, pool_size=10, strides=1)
labels = tf.squeeze(tf.sign(tf.reduce_sum(out, axis=3)))

In [9]:
samples = (np.random.uniform(size = [5, 10,10]) < .3).astype(float).reshape(-1,10,10,1)

In [10]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    res = sess.run(labels, feed_dict={inp_heuristics: samples})
res

array([1., 0., 0., 0., 0.], dtype=float32)

In [11]:
np.rollaxis(samples[3],-1, 0)

array([[[0., 1., 0., 1., 1., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 1., 1., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 1., 1., 0.],
        [0., 1., 0., 0., 0., 0., 1., 1., 1., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 1., 1.],
        [0., 0., 0., 1., 0., 0., 0., 0., 1., 0.]]])

In [12]:
def create_samples(sess, placeholder, n=100):
    samples = (np.random.uniform(size = [n, 10,10]) < .3).astype(float).reshape(-1,10,10,1)
    lbls = sess.run(labels, feed_dict={placeholder: samples})
    return samples, lbls

### Combining ResNet and Inception Concepts

#### Design considerations
In contrast to Deepmind's network, I'm also using 5x5 filters in an inception [Ref] manner. I use blocks of 3 inception layers with skip connections between the blocks. The skip connections are 1x1 2-filter layers, so that each block's output is mapped into a feature map of 2 channels, which then skips the subsequent block. I'm using a single batch-normalization layer at the end of each block because I accept more risk of overfitting in favour of reducing noise. Gomoku is not about image recognition. The risk of overfitting is come by with zillions of synthetically created boards, anyway.

In [36]:
class ResNet:
    """
    After sufficient training, this instance of ResNet takes an array of dimensions 10x10 
    and returns 1 if the array contains the pattern you tought it to recognize.
    """
    def __init__(self, n_blocks):
        
        self.inps = tf.placeholder(name="inp_resnet", shape=[None, 10, 10, 1], dtype=tf.float32)
        self.lbls = tf.placeholder(name="lbl_resnet", shape=[None, 1], dtype=tf.float32)

        out = self.inps

        for i in range(n_blocks):
            out = self._res_block(out)

        out = tf.layers.conv2d(kernel_size=1, filters=1, inputs=out, padding='same', activation='sigmoid')
        self.out = tf.reshape(tf.layers.max_pooling2d(inputs=out, pool_size=10, strides=1), [-1, 1])

        self.errors = (self.lbls - self.out)**2
        self.accuracy=tf.reduce_sum(tf.cast(self.errors < .1, dtype=tf.int64))

        self.loss = tf.losses.mean_squared_error(self.out, self.lbls)
        self.optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
        self.trainer = self.optimizer.minimize(self.loss)
    
    
    def _res_block(self, inp, filters=16, activation='elu'):
      
        out1_3 = tf.layers.conv2d(kernel_size=3, filters=filters, inputs=inp, padding='same', activation=activation)
        out1_5 = tf.layers.conv2d(kernel_size=5, filters=filters, inputs=inp, padding='same', activation=activation)
        out1 = tf.concat([out1_3, out1_5], axis=3)
        
        out2_3 = tf.layers.conv2d(kernel_size=3, filters=filters, inputs=out1, padding='same', activation=activation)
        out2_5 = tf.layers.conv2d(kernel_size=5, filters=filters, inputs=out1, padding='same', activation=activation)
        out2 = tf.concat([out2_3, out2_5], axis=3)
        
        out3_3 = tf.layers.conv2d(kernel_size=3, filters=filters, inputs=out2, padding='same', activation=activation)
        out3_5 = tf.layers.conv2d(kernel_size=5, filters=filters, inputs=out2, padding='same', activation=activation)
        out3 = tf.concat([out3_3, out3_5], axis=3)
        
        bn = tf.layers.batch_normalization(inputs=out3)

        skip = tf.layers.conv2d(kernel_size=1, filters=2, inputs=inp, padding='same', activation=None)
        
        return tf.concat([skip, bn], axis=3)

In [37]:
resnet = ResNet(10)

In [38]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for i in range(40001):
        smp, lbl = create_samples(sess, inp_heuristics, 500)
        lbl = lbl.reshape([-1, 1])
        l, o, _ = sess.run([resnet.loss, resnet.out, resnet.trainer], feed_dict={resnet.lbls: lbl, resnet.inps: smp})
        if i % 1000 == 0:
            print("training loss %s: " % l)
            
            smp, lbl = create_samples(sess, inp_heuristics, 1000)
            lbl = lbl.reshape([-1, 1])
            acc, pred, err = sess.run([resnet.accuracy, resnet.out, resnet.errors], feed_dict={resnet.inps: smp, resnet.lbls: lbl})
            print("Accuracy %s" % acc)


training loss 0.40279025: 
Accuracy 20
training loss 0.12310071: 
Accuracy 661
training loss 0.091297686: 
Accuracy 800
training loss 0.03960142: 
Accuracy 903
training loss 0.030346967: 
Accuracy 959
training loss 0.014962292: 
Accuracy 983
training loss 0.0040859394: 
Accuracy 993
training loss 0.0024022518: 
Accuracy 992
training loss 0.00020789387: 
Accuracy 998
training loss 0.0021745246: 
Accuracy 976
training loss 0.0026933313: 
Accuracy 995
training loss 0.0006808436: 
Accuracy 998
training loss 0.00790388: 
Accuracy 991
training loss 0.0019695032: 
Accuracy 1000
training loss 5.5540622e-05: 
Accuracy 998
training loss 7.08283e-05: 
Accuracy 997
training loss 0.00023120517: 
Accuracy 1000
training loss 5.6351633e-05: 
Accuracy 999
training loss 0.0011512511: 
Accuracy 999
training loss 6.771554e-08: 
Accuracy 1000
training loss 3.6179044e-05: 
Accuracy 1000
training loss 1.6873728e-05: 
Accuracy 999
training loss 2.0926971e-07: 
Accuracy 1000
training loss 0.00035017464: 
Accur