In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import skimage.transform
import matplotlib.pyplot as plt

import itertools
from typing import Tuple

In [2]:
tf.test.is_gpu_available()

True

## Load training data

Load data and create shifted versions.  
This is not redundant, even though we are using convolutional layers, as there are boundary effects (especially with the very small size).

In [3]:
WIDTH = 5
HEIGHT = 5

with open("data.txt") as f:
    t = f.read()

xs = []
ys = []

for ex in t.split("\n\n"):
    lines = ex.split("\n")
    y  = int(lines[0])
    x = np.array([[0 if x == "." else 1 for x in line] for line in lines[1:]])
    
    ex_width = np.max(np.arange(1, WIDTH + 1) * x)
    ex_height = np.max((np.arange(1, HEIGHT + 1) * x.T).T)
    print(y, ex_width, ex_height, end="")
    for offset_x, offset_y in itertools.product(range(WIDTH - ex_width + 1),
                                                range(HEIGHT - ex_height + 1)):
        xs.append(np.roll(x, (offset_y, offset_x), axis=(0, 1)))
        ys.append(y)
        print(".", end="")
    print()

xs = np.array(xs).astype(float)[..., np.newaxis]
ys = np.array(ys)

0 3 3.........
0 3 3.........
0 4 4....
0 3 4......
0 3 4......
0 3 4......
0 3 5...
0 4 4....
0 4 5..
0 4 4....
0 4 5..
0 5 5.
1 1 5.....
1 1 4..........
1 1 3...............
1 3 3.........
1 2 4........
1 3 4......
1 3 4......
1 3 5...
1 3 4......
1 3 4......
2 5 5.
2 3 5...
2 3 3.........
2 3 5...
2 5 5.
2 4 5..
2 5 5.
2 5 5.
2 5 5.
2 3 4......
2 3 4......
2 3 5...
2 3 4......
3 3 3.........
3 2 3............
3 4 5..
3 4 5..
3 3 5...
3 2 5....
3 4 5..
3 5 5.
3 3 5...
3 5 5.
3 5 5.
3 4 5..
3 3 5...
3 3 5...
3 4 5..
3 4 5..
3 3 3.........
3 3 4......
3 3 4......
3 3 4......
4 4 5..
4 4 5..
4 5 5.
4 4 5..
4 4 5..
4 3 3.........
4 2 4........
4 2 3............
4 4 4....
4 4 4....
4 5 5.
4 4 5..
4 3 4......
4 3 4......
5 4 5..
5 4 5..
5 3 5...
5 3 5...
5 3 5...
5 4 5..
5 5 5.
5 2 4........
5 3 4......
5 3 3.........
5 3 4......
5 3 4......
5 3 4......
5 4 4....
5 2 5....
5 2 5....
5 2 5....
6 3 5...
6 3 4......
6 3 5...
6 3 3.........
6 2 3............
6 2 4........
6 3 5...
6 2 4.......

\# of (shifted) examples per class

In [4]:
len(ys)

601

In [5]:
pd.Series(ys).value_counts()

3    79
6    74
1    74
5    73
9    66
4    61
0    56
8    44
2    43
7    31
dtype: int64

In [6]:
i = np.random.randint(0, len(ys))
print(ys[i])
print(xs[i,:,:,0].astype(int))

6
[[0 0 1 1 0]
 [0 1 0 0 0]
 [0 1 1 1 0]
 [0 1 1 1 0]
 [0 0 0 0 0]]


In [7]:
def random_flip(x: np.ndarray, xs_strings: np.ndarray) -> np.ndarray:
    """Randomly flips a bit in x, ensuring that the resulting pattern
       does not occurr in xs_strings
    """
    while True:
        x_new = x.copy()
        pos_y = np.random.randint(0, HEIGHT)
        pos_x = np.random.randint(0, WIDTH)
        x_new[pos_y, pos_x] = 1 - x_new[pos_y, pos_x]
        if x_new.tostring() not in xs_strings:
            return x_new

In [8]:
xs_strings = {x.tostring() for x in xs}

In [20]:
BATCH_SIZE = 512

ds = tf.data.Dataset.zip((
    tf.data.Dataset.from_tensor_slices(xs),
    tf.data.Dataset.from_tensor_slices(ys)
)).shuffle(buffer_size=len(ys)).repeat()

def augment(x: np.ndarray) -> np.ndarray:
    """Flip up to 3 bits randomly"""
    if np.random.uniform() > 0.6:
        x = random_flip(x, xs_strings)
    if np.random.uniform() > 0.85:
        x = random_flip(x, xs_strings)
    if np.random.uniform() > 0.95:
        x = random_flip(x, xs_strings)
    
    return x
    

def map_fn(x: tf.Tensor, y: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
    x = tf.numpy_function(func=augment , inp=[x], Tout=tf.float64)
    x.set_shape([HEIGHT, WIDTH, 1])
    return x, y

ds = ds.map(map_fn, num_parallel_calls=8)
ds = ds.batch(BATCH_SIZE)


In [21]:
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(4, 2, activation="relu", padding="valid", input_shape=(WIDTH, HEIGHT, 1)),
    tf.keras.layers.Conv2D(8, 2,  activation="relu"),
    tf.keras.layers.Conv2D(16, 2,  activation="relu"),
    tf.keras.layers.GlobalMaxPooling2D(),
    tf.keras.layers.Dense(10, activation=tf.keras.activations.softmax)
])

In [22]:
model.count_params()

854

In [23]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0025), metrics=["accuracy"], loss=tf.keras.losses.sparse_categorical_crossentropy)

In [24]:
# training is fast, so let's just go crazy with the number of epochs
for i in range(50):
    model.fit(ds, epochs=100, verbose=0, steps_per_epoch=len(ys) // BATCH_SIZE)
    print(model.evaluate(xs, ys, verbose=0))

[1.5875214978978163, 0.4342762]
[0.8785768717576977, 0.70216304]
[0.5461915427952162, 0.84858567]
[0.41655132040604576, 0.8935108]
[0.34321776409315785, 0.9267887]
[0.3063359028487753, 0.9267887]
[0.27414292345031127, 0.9384359]
[0.24537700717143726, 0.953411]
[0.2250930157655884, 0.96339434]
[0.2110891010915975, 0.96672213]
[0.19848495786281276, 0.96339434]
[0.18753172008447758, 0.96672213]
[0.17774632945036928, 0.9733777]
[0.17692251595205158, 0.97504157]
[0.16352415493938965, 0.9800333]
[0.1592432604157389, 0.97836936]
[0.14995828094875158, 0.97504157]
[0.14775649323042936, 0.98169714]
[0.14361647222680776, 0.9800333]
[0.13877129490284276, 0.98336107]
[0.13671101800812263, 0.98668885]
[0.1368089908619292, 0.98668885]
[0.1299950772285858, 0.98336107]
[0.12354691150581182, 0.99001664]
[0.1317159654495125, 0.98668885]
[0.11858940300548731, 0.9916805]
[0.11993231206586873, 0.99001664]
[0.11718819048658584, 0.9916805]
[0.1229554431361089, 0.99001664]
[0.10639031774390756, 0.9933444]
[0.1

In [25]:
model.optimizer.lr.assign(0.0005)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=0.0005>

In [26]:
for i in range(20):
    model.fit(ds, epochs=100, verbose=0, steps_per_epoch=len(ys) // BATCH_SIZE)
    print(model.evaluate(xs, ys, verbose=0))

[0.08378180686675768, 0.9983361]
[0.08266442559175999, 0.9966722]
[0.08259089386552423, 0.9966722]
[0.08277329469034557, 0.9966722]
[0.08343032980570182, 0.9966722]
[0.08216995145883814, 0.9966722]
[0.08163688189277236, 0.9966722]
[0.08198392936671633, 0.9966722]
[0.08212041901966895, 0.9966722]
[0.08243998907469274, 0.9983361]
[0.08120984948564092, 0.9983361]
[0.0825885234527699, 0.9966722]
[0.08261346446347118, 0.9983361]
[0.081855698180278, 0.9983361]
[0.0811349679398259, 0.9983361]
[0.08232082062077006, 0.9983361]
[0.08068347562370999, 0.9983361]
[0.08148023616305604, 0.9966722]
[0.08137531521514728, 0.9983361]
[0.08173648129138296, 0.9983361]


In [27]:
model.optimizer.lr.assign(0.00005)
for i in range(5):
    model.fit(ds, epochs=100, verbose=0, steps_per_epoch=len(ys) // BATCH_SIZE)
    print(model.evaluate(xs, ys, verbose=0))

[0.08067461735446521, 0.9983361]
[0.08020514459310474, 0.9983361]
[0.08047649403181727, 0.9983361]
[0.08000410159742971, 0.9983361]
[0.08002653043599375, 0.9983361]


In [28]:
model.evaluate(ds, steps=len(ys) // BATCH_SIZE)



[0.46074575185775757, 0.8691406]

In [29]:
print(model.evaluate(xs, ys, verbose=0))

[0.08002653043599375, 0.9983361]


In [36]:
xs[model.predict_classes(xs) != ys][...,0]

array([[[1., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 1., 1., 0., 0.],
        [0., 0., 0., 0., 0.]]])

In [37]:
model.save("5x5_4-8-16_filters.savedmodel")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: 5x5_4-8-16_filters.savedmodel\assets


In [3]:
model = tf.keras.models.load_model("5x5_4-8-16_filters.savedmodel/")

In [4]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 4, 4, 4)           20        
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 3, 3, 8)           136       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 2, 2, 16)          528       
_________________________________________________________________
global_max_pooling2d (Global (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 10)                170       
Total params: 854
Trainable params: 854
Non-trainable params: 0
_________________________________________________________________


In [38]:
for i in range(3):
    base_name = f"weights/conv{i}_{{}}.npy"
    kernel, bias = model.layers[i].weights
    np.save(base_name.format("kernel"), kernel.numpy())
    np.save(base_name.format("bias"), bias.numpy())
    
kernel, bias = model.layers[4].weights
np.save("weights/dense_kernel.npy", kernel.numpy())
np.save("weights/dense_bias.npy", bias.numpy())

In [9]:
test_input = np.array([
    [0,0,0,0,0],
    [1,1,1,0,0],
    [1,1,1,0,0],
    [0,0,1,0,0],
    [0,0,0,0,0]
    ])[np.newaxis,..., np.newaxis].astype(np.float32)

In [10]:
model(test_input)

<tf.Tensor: id=1215, shape=(1, 10), dtype=float32, numpy=
array([[4.7821915e-05, 1.0191064e-03, 5.8183535e-03, 9.3612295e-01,
        2.4241449e-06, 7.1806074e-03, 3.5705147e-04, 9.8084245e-05,
        3.8822528e-02, 1.0531087e-02]], dtype=float32)>

In [39]:
np.percentile(model.layers[0](xs).numpy(), 99)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



3.466181516647339

In [20]:
model.layers[0](xs).numpy().min()

0.0

In [40]:
np.percentile(model.layers[1](model.layers[0](xs)).numpy(), 99)

7.8704609870910645

In [41]:
np.percentile(
    model.layers[2](model.layers[1](model.layers[0](xs))).numpy(), 99)

14.721277723312408

In [42]:
np.percentile(
    model.layers[3](
        model.layers[2](
            model.layers[1](
                model.layers[0](xs)
            ))).numpy(), 99)

17.385940551757812