In [2]:
import random
import numpy as np
import tensorflow as tf
print(tf.__version__)

2.8.0


## Synthetic data

We create a set of *boards* of size $5 \times 5$ with 3 channels for each position. The label at each position $(j,k)$ is computed as a function of the channel values $v_{jkl}$ at that position.

$$
    L_{jk} = \sum_{l=0}^2 a_l \cdot (v_{jkl})^{l+1}
$$

Here, $a_l$ denote arbitrary coefficients defined below. Note that this function is the same for every position. Thus, a sufficiently deep convolutional network with only $1 \times 1$ kernels should easily learn this function by simultaneously looking at all the positions of any given training board.

In [3]:
def create_data(n):
    _batch = np.zeros([n,5,5,3])
    _labels = np.zeros([n,5,5,1])
    _a=[.9, .3, -.2]
    for _i in range(n):
        for _x in range(5):
            for _y in range(5):
                for _l in range(3):
                    _v = 2*(random.random()-0.5)
                    _batch[_i][_x][_y][_l] = _v
                    _labels[_i][_x][_y][0] += _a[_l] * _v**(_l+1)
    return _batch, _labels

In [20]:
N = 100
train, labels = create_data(N)

In [21]:
train.shape

(100, 5, 5, 3)

Smartly rearranging the dimensions of the first *board* of the batch shows the three $5 \times 5$ channels

In [22]:
print(np.rollaxis(train[0], 2, 0))

[[[ 0.0325794   0.71461428 -0.67176429 -0.24582805  0.7415726 ]
  [ 0.0492563   0.42333726  0.29222109 -0.4912165  -0.90514223]
  [-0.85357304  0.74491648  0.0648897  -0.23212615 -0.23138911]
  [-0.74232668  0.5381589  -0.62301182 -0.41845993 -0.45040138]
  [-0.18861596  0.61375406 -0.74924854  0.02341287  0.17109993]]

 [[ 0.92173151  0.95110107  0.96829689 -0.66384309 -0.89470533]
  [ 0.55667643  0.54301528 -0.57049661 -0.93937451  0.87839482]
  [-0.10195519 -0.94672435  0.2829392   0.72431629  0.30002016]
  [-0.71424886 -0.98877846  0.89144974  0.13796427 -0.09204407]
  [-0.25205293 -0.73964015 -0.63551836 -0.07581218  0.92247503]]

 [[-0.15222887 -0.82176197 -0.50881642 -0.19480477 -0.73554699]
  [ 0.36226702  0.25068967  0.75864517 -0.91850933  0.80975499]
  [ 0.34874026 -0.39289979  0.28578129  0.08754223 -0.81253089]
  [ 0.06483587 -0.45779226 -0.89521371 -0.11710265  0.19919398]
  [-0.14505716  0.36599129 -0.5821453  -0.74212491  0.07906569]]]


## TF 2.8

In [24]:
tf_train = tf.data.Dataset.from_tensor_slices(train)
tf_train

<TensorSliceDataset element_spec=TensorSpec(shape=(5, 5, 3), dtype=tf.float64, name=None)>

In [25]:
tf_labels = tf.data.Dataset.from_tensor_slices(labels)
tf_labels

<TensorSliceDataset element_spec=TensorSpec(shape=(5, 5, 1), dtype=tf.float64, name=None)>

In [32]:
train_input = tf.data.Dataset.zip((tf_train, tf_labels)).batch(10)

In [33]:
train_data, train_labels = next(iter(train_input))

In [34]:
train_data

<tf.Tensor: shape=(10, 5, 5, 3), dtype=float64, numpy=
array([[[[ 3.25794011e-02,  9.21731507e-01, -1.52228868e-01],
         [ 7.14614281e-01,  9.51101070e-01, -8.21761966e-01],
         [-6.71764294e-01,  9.68296894e-01, -5.08816421e-01],
         [-2.45828055e-01, -6.63843089e-01, -1.94804770e-01],
         [ 7.41572602e-01, -8.94705335e-01, -7.35546987e-01]],

        [[ 4.92562961e-02,  5.56676429e-01,  3.62267017e-01],
         [ 4.23337261e-01,  5.43015283e-01,  2.50689674e-01],
         [ 2.92221091e-01, -5.70496612e-01,  7.58645174e-01],
         [-4.91216504e-01, -9.39374509e-01, -9.18509331e-01],
         [-9.05142235e-01,  8.78394815e-01,  8.09754992e-01]],

        [[-8.53573044e-01, -1.01955190e-01,  3.48740256e-01],
         [ 7.44916477e-01, -9.46724347e-01, -3.92899786e-01],
         [ 6.48897008e-02,  2.82939196e-01,  2.85781292e-01],
         [-2.32126145e-01,  7.24316293e-01,  8.75422291e-02],
         [-2.31389110e-01,  3.00020165e-01, -8.12530891e-01]],

        [

In [35]:
train_labels

<tf.Tensor: shape=(10, 5, 5, 1), dtype=float64, numpy=
array([[[[ 0.28490369],
         [ 1.0255168 ],
         [-0.29696228],
         [-0.08756043],
         [ 0.98715514]],

        [[ 0.12778866],
         [ 0.46631228],
         [ 0.27331239],
         [-0.02238571],
         [-0.68934656]],

        [[-0.77358002],
         [ 0.95144133],
         [ 0.0777491 ],
         [-0.05165748],
         [-0.07395894]],

        [[-0.51510309],
         [ 0.79683612],
         [-0.17881964],
         [-0.37058253],
         [-0.40440035]],

        [[-0.15008472],
         [ 0.70669404],
         [-0.5137016 ],
         [ 0.10454079],
         [ 0.40917914]]],


       [[[ 0.04515984],
         [ 0.47206015],
         [ 0.70314546],
         [-0.162142  ],
         [ 0.8889798 ]],

        [[-0.13042347],
         [ 0.42827369],
         [ 0.7068355 ],
         [-0.71290531],
         [-0.54962645]],

        [[ 0.33355867],
         [ 0.31582773],
         [-0.28234204],
         [-0.2268

In [None]:
from tensorflow.feature_column import numeric_column
from tensorflow.keras import datasets, layers, models

In [16]:
# conv1 = tf.keras.conv2d(inputs=tf_batch, filters=32, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv1 = tf.keras.layers.Conv2D(32, (1,1), activation=tf.nn.elu)(tf_batch)

TypeError: Inputs to a layer should be tensors. Got: <TensorSliceDataset element_spec=TensorSpec(shape=(5, 5, 3), dtype=tf.float64, name=None)>

---

## Deprecated TF code

In [None]:
tf_inputs = tf.placeholder(tf.float32, [None, 5, 5, 3])
tf_labels = tf.placeholder(tf.float32, [None, 5, 5, 1])

### A special CNN
The convolutional network below can actually be regarded as a single convolutional layer with the kernel itself being a 5-layer feed-forward NN with layers $[3, 8, 32, 32, 1]$.

In [None]:
conv1 = tf.layers.conv2d(inputs=tf_inputs, filters=32, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv2 = tf.layers.conv2d(inputs=conv1, filters=128, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv3 = tf.layers.conv2d(inputs=conv2, filters=32, kernel_size=[1,1], strides=[1,1], padding='VALID', activation=tf.nn.elu)
conv4 = tf.layers.conv2d(inputs=conv3, filters=1, kernel_size=[1,1], strides=[1,1], padding='VALID')

loss = tf.losses.mean_squared_error(tf_labels,conv4)
optimizer = tf.train.AdamOptimizer(learning_rate=3e-4).minimize(loss)

### Training
We train the network and compute training loss and test loss once in a while

In [None]:
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    for i in range(3001):
        _ = session.run(optimizer, feed_dict={_inputs: batch, _labels: labels})
        if i % 1000 == 0:
            l = session.run(loss, feed_dict={_inputs: batch, _labels: labels})
            l_t = session.run(loss, feed_dict={_inputs: batch_t, _labels: labels_t})
            print(l, l_t)

From the smooth convergence also on the test set we can see that the network has indeed learned our label function.