In [None]:
from __future__ import absolute_import, print_function, division

In [None]:
import random
import numpy as np
import tensorflow as tf

## Synthetic data

We create a set of *boards* of size $5 \times 5$ with 3 channels for each position. The label at each position $(j,k)$ is computed as a function of the channel values $v_{jkl}$ at that position.

$$
    L_{jk} = \sum_{l=0}^2 a_l \cdot (v_{jkl})^{l+1}
$$

Here, $a_l$ denote arbitrary coefficients defined below. Note that this function is the same for every position. Thus, a sufficiently deep convolutional network with only $1 \times 1$ kernels should easily learn this function by simultaneously looking at all the positions of any given training board.

In [None]:
def create_data(N):
    batch = np.zeros([N,5,5,3])
    labels = np.zeros([N,5,5,1])
    a=[.9, .3, -.2]
    for i in range(N):
        for x in range(5):
            for y in range(5):
                for l in range(3):
                    v = 2*(random.random()-0.5)
                    batch[i][x][y][l] = v
                    labels[i][x][y][0] += a[l] * v**(l+1)
    return batch,labels

In [None]:
N = 100
batch, labels = create_data(N)
batch_t, labels_t = create_data(N)

In [None]:
batch.shape

Smartly rearranging the dimensions of the first *board* of the batch shows the three $5 \times 5$ channels

In [None]:
print(np.rollaxis(batch[0], 2, 0))

In [None]:
_inputs = tf.placeholder(tf.float32, [None, 5, 5, 3])
_labels = tf.placeholder(tf.float32, [None, 5, 5, 1])

### A special CNN
The convolutional network below can actually be regarded as a single convolutional layer with the kernel itself being a 5-layer feed-forward NN with layers $[3, 8, 32, 32, 1]$.

In [None]:
conv1 = tf.layers.conv2d(inputs=_inputs, filters=32, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv2 = tf.layers.conv2d(inputs=conv1, filters=128, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv3 = tf.layers.conv2d(inputs=conv2, filters=32, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv4 = tf.layers.conv2d(inputs=conv3, filters=1, kernel_size=[1,1], strides=[1,1], padding='VALID')

loss = tf.losses.mean_squared_error(_labels,conv4)
optimizer = tf.train.AdamOptimizer(learning_rate=3e-4).minimize(loss)

### Training
We train the network and compute training loss and test loss once in a while

In [None]:
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    for i in range(3001):
        _ = session.run(optimizer, feed_dict={_inputs: batch, _labels: labels})
        if i % 1000 == 0:
            l = session.run(loss, feed_dict={_inputs: batch, _labels: labels})
            l_t = session.run(loss, feed_dict={_inputs: batch_t, _labels: labels_t})
            print(l, l_t)

From the smooth convergence also on the test set we can see that the network has indeed learned our label function.