In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np
import tensorflow as tf

from domoku.constants import BLACK, WHITE
from domoku.tools import GomokuTools as Gt

In [2]:
input_size=5

---

## Sampling Random Boards

In [3]:
from notebooks.ml_basics_recap.data import new_sample
sample = new_sample(board_size=input_size, num_blacks=20, num_whites=0)
sample.shape

(1, 5, 5, 2)

In [4]:
Gt.print_bin(sample, True)

shape: (5, 5, 2)
[[1. 1. 0. 1. 0.]
 [1. 0. 0. 0. 1.]
 [1. 0. 1. 0. 0.]
 [1. 1. 0. 0. 0.]
 [1. 0. 1. 1. 0.]]


---

## The Detection Map

In [5]:
from notebooks.ml_basics_recap.models.heuristic_detector import HeuristicDetector
detector = HeuristicDetector(input_size)

2022-05-26 17:58:21.894610: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
tf.floor(detector(sample)+.01)

<tf.Tensor: shape=(1, 5, 5), dtype=float32, numpy=
array([[[0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]], dtype=float32)>

---

## Batches From a Dataset

In [7]:
BATCH_SIZE=4

In [8]:
from notebooks.ml_basics_recap.data import new_sample, new_dataset
dataset = new_dataset(100, lambda: new_sample(input_size, 20, 0), detector).batch(BATCH_SIZE)

In [9]:
iterator  = iter(dataset)
states, labels = iterator.next()
labels.shape

TensorShape([4, 5, 5])

In [10]:
states.shape

TensorShape([4, 1, 5, 5, 2])

---

## The Trainable Model

In [11]:
from notebooks.ml_basics_recap.models import SimpleConvQFunction

model = SimpleConvQFunction(input_size, n_filters=8, n_layers=4)

In [12]:
print(model(states).shape)
model(states)

(4, 1, 5, 5)


<tf.Tensor: shape=(4, 1, 5, 5), dtype=float32, numpy=
array([[[[0.05512343, 0.05147012, 0.05102488, 0.05052979, 0.05499422],
         [0.0487574 , 0.04349193, 0.0436986 , 0.04213116, 0.04947419],
         [0.04903263, 0.04209143, 0.04278982, 0.04147002, 0.04853417],
         [0.04810731, 0.04098667, 0.04152723, 0.04087096, 0.04813401],
         [0.05221086, 0.04531613, 0.04578032, 0.04499815, 0.04941812]]],


       [[[0.05527663, 0.05151242, 0.0510808 , 0.05028641, 0.0552802 ],
         [0.04889812, 0.04266028, 0.04336601, 0.04198359, 0.04928682],
         [0.04885429, 0.04248419, 0.042679  , 0.04096809, 0.04832859],
         [0.04813589, 0.0417157 , 0.04213455, 0.04043828, 0.04785857],
         [0.05220826, 0.04546057, 0.04593386, 0.04521342, 0.04938158]]],


       [[[0.05527248, 0.05098452, 0.05117118, 0.05079096, 0.05524288],
         [0.0485605 , 0.04286669, 0.04385009, 0.04209748, 0.0494421 ],
         [0.0484607 , 0.04226095, 0.04225322, 0.04175927, 0.04863562],
         [0.048

---

## Training

In [13]:
TRAIN_SIZE = 1024 * 64
TEST_SIZE = 4096
BATCH_SIZE = 256

In [14]:
train_dataset = new_dataset(size=TRAIN_SIZE,
                            sampler=lambda: new_sample(board_size=input_size, num_blacks=10, num_whites=0),
                            labeler=detector
                            ).batch(BATCH_SIZE)
test_dataset = new_dataset(size=TEST_SIZE,
                           sampler=lambda: new_sample(board_size=input_size, num_blacks=10, num_whites=0),
                           labeler=detector
                           ).batch(BATCH_SIZE)

In [15]:
class BalancedMSE(tf.keras.losses.Loss):
    def __init__(self, ratio, *args, **kwargs):
        """
        upwards = ratio, downwards = 1 - ratio
        :param ratio: from the range [0, 1]. ratio>0.5 will favor upwards corrections
        """
        super().__init__(*args, **kwargs)
        self.ratio = ratio

    def call(self, y_label, y_model):

        difference = y_label - y_model

        # only derivations where label > model
        upwards = tf.nn.relu(self.ratio * difference)

        # only derivations wherer label < model
        downwards = tf.nn.relu((1.0 - self.ratio) * -difference)

        mse = tf.reduce_mean(tf.square(upwards + downwards))

        return mse

In [16]:
#loss_object = tf.keras.losses.MeanSquaredError()
loss_object = BalancedMSE(ratio=.9) # more weight to upwards corrections
optimizer = tf.keras.optimizers.Adam(learning_rate=.01)

In [17]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.RootMeanSquaredError(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.RootMeanSquaredError(name='test_accuracy')

In [18]:
model = SimpleConvQFunction(input_size, n_filters=4, n_layers=2)

In [19]:
@tf.function
def train_step(states, labels):
    with tf.GradientTape() as tape:
        predictions = model(states, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

In [20]:
@tf.function
def test_step(states, labels):
    predictions = model(states, training=False)
    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

In [24]:
EPOCHS = 20
for epoch in range(EPOCHS):
    for stats in [train_loss, train_accuracy, test_accuracy, test_loss]:
        stats.reset_states()

    for states, labels in train_dataset:
        train_step(states, labels)

    for states, labels in test_dataset:
        test_step(states, labels)

    print(
        f'Epoch {epoch+1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy.result()},     '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy.result()}'
    )



Epoch 1, Loss: 0.006255467887967825, Accuracy: 0.7112389802932739,     Test Loss: 0.006303819362074137, Test Accuracy: 0.7146481275558472
Epoch 2, Loss: 0.006255167070776224, Accuracy: 0.7111312747001648,     Test Loss: 0.006301793269813061, Test Accuracy: 0.7125576734542847
Epoch 3, Loss: 0.006255201064050198, Accuracy: 0.7111881971359253,     Test Loss: 0.006304567214101553, Test Accuracy: 0.7144740223884583
Epoch 4, Loss: 0.00625541852787137, Accuracy: 0.7111892104148865,     Test Loss: 0.006304538808763027, Test Accuracy: 0.7135357856750488
Epoch 5, Loss: 0.006255420856177807, Accuracy: 0.7112113237380981,     Test Loss: 0.006302868016064167, Test Accuracy: 0.7155792713165283
Epoch 6, Loss: 0.006255086977034807, Accuracy: 0.7111674547195435,     Test Loss: 0.006304854527115822, Test Accuracy: 0.7135363221168518
Epoch 7, Loss: 0.0062560453079640865, Accuracy: 0.7112904787063599,     Test Loss: 0.006303894799202681, Test Accuracy: 0.7182992100715637
Epoch 8, Loss: 0.00625520991161465

In [25]:
BATCH_SIZE=1

In [26]:
dataset = new_dataset(1, lambda: new_sample(input_size, 10, 0), detector).batch(BATCH_SIZE)

In [27]:
iterator  = iter(dataset)
states, labels = iterator.next()
labels.shape

TensorShape([1, 5, 5])

In [28]:
Gt.print_bin(states, True)

shape: (5, 5, 2)
[[1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 1.]
 [1. 0. 0. 1. 1.]
 [0. 0. 0. 0. 0.]
 [1. 0. 1. 1. 0.]]


In [29]:
tf.floor(labels+.01)

<tf.Tensor: shape=(1, 5, 5), dtype=float32, numpy=
array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]], dtype=float32)>

In [30]:
pred = model(states)
np.round(pred, 2)

array([[[[0.05, 0.73, 0.73, 0.74, 0.03],
         [0.72, 0.91, 0.91, 0.91, 0.72],
         [0.73, 0.91, 0.92, 0.91, 0.72],
         [0.73, 0.91, 0.91, 0.91, 0.71],
         [0.04, 0.7 , 0.7 , 0.69, 0.03]]]], dtype=float32)

In [8]:
diag1 = np.diag([1., 1., 1.])
zeros = np.zeros([3,3])
diag1 = np.stack([diag1, zeros], axis=0)
diag2 = np.diag([1., 1., 1.])[::-1,:]
diag2 = np.stack([diag2, zeros], axis=0)
hor = np.stack([zeros, zeros], axis=0)
hor[0, 1, :] = 1.
ver = np.stack([zeros, zeros], axis=0)
ver[0, :, 1] = 1.
filters = np.stack([ver, diag1, hor, diag2], axis=-1)
filters = np.rollaxis(filters, 0, 3)
filters.shape

(3, 3, 2, 4)

In [4]:
detector = tf.keras.layers.Conv2D(
    filters=4, kernel_size=(3, 3),
    kernel_initializer=tf.constant_initializer(filters),
    bias_initializer=tf.constant_initializer(-2),
    padding='same',
    activation=tf.nn.relu,
    input_shape=(input_size, input_size, 2))

combiner = tf.keras.layers.Conv2D(
    filters = 1, kernel_size=(1,1),
    kernel_initializer=tf.constant_initializer([1.,1.,1.,1.]),
    bias_initializer=tf.constant_initializer(0),
)

In [5]:
r, c = np.random.randint(0, 7, 2)
r, c

(5, 0)

In [16]:
def new_sample():
    sample = np.zeros([1, 7,7,2])
    sample.shape
    for n in range(20):
        row, col = np.random.randint(0, 7, 2)
        sample[0, row, col, BLACK] = 1
    for n in range(0):
        row, col = np.random.randint(0, 7, 2)
        sample[0, row, col, WHITE] = 1
    return sample

sample = new_sample()
sample.shape

(1, 7, 7, 2)

In [11]:
Gt.print_bin(sample)

shape: (7, 7, 2)
[[0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 1. 1. 1.]
 [0. 0. 0. 0. 1. 1. 1.]
 [1. 0. 1. 1. 1. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]]

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


## The heuristic detector finds lines of three

In [12]:
print(np.squeeze(combiner(detector(sample))))

NameError: name 'combiner' is not defined

In [24]:
from domoku.jupyter_tools import print_bin
sample = new_sample()
print_bin(np.squeeze(sample), True)
print()
print(np.squeeze(tf.reduce_max(detector(sample), axis=-1)))

shape: (7, 7, 2)
[[0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 1. 0.]
 [1. 1. 1. 0. 0. 0. 1.]
 [1. 1. 0. 1. 1. 1. 0.]
 [1. 0. 1. 1. 1. 1. 1.]
 [0. 1. 0. 0. 1. 0. 0.]
 [1. 1. 0. 0. 0. 1. 0.]]

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [1. 1. 0. 0. 0. 0. 0.]
 [1. 1. 0. 1. 1. 1. 0.]
 [0. 0. 1. 1. 1. 1. 0.]
 [0. 1. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]


## Train a QFunction to mimic the detector
Actually, then it's not a Q-Functions, but let's not be too rigourous...;-)

In [45]:
def new_dataset(size: int):

    samples = []
    labels = []
    for i in range(size):
        a_sample = new_sample()
        samples.append(a_sample)
        labels.append(np.squeeze(combiner(detector(a_sample))))
    return samples, labels
dataset = tf.data.Dataset.from_tensor_slices(new_dataset(100))

In [None]:
BATCH_SIZE=4

In [56]:
batch = dataset.batch(BATCH_SIZE)

In [57]:
iterator  = iter(batch)
states, labels = iterator.next()
labels.shape

TensorShape([4, 7, 7])

In [58]:
states.shape

TensorShape([4, 1, 7, 7, 2])

In [51]:
from notebooks.ml_basics_recap.models import SimpleConvQFunction

model = SimpleConvQFunction(input_size)

In [59]:
states

<tf.Tensor: shape=(4, 1, 7, 7, 2), dtype=float64, numpy=
array([[[[[1., 0.],
          [0., 0.],
          [0., 0.],
          [1., 0.],
          [1., 0.],
          [1., 0.],
          [1., 0.]],

         [[0., 0.],
          [1., 0.],
          [1., 0.],
          [0., 0.],
          [1., 0.],
          [1., 0.],
          [1., 0.]],

         [[1., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],

         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [1., 0.],
          [1., 0.]],

         [[0., 0.],
          [0., 0.],
          [1., 0.],
          [1., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],

         [[1., 0.],
          [1., 0.],
          [1., 0.],
          [0., 0.],
          [1., 0.],
          [1., 0.],
          [1., 0.]],

         [[1., 0.],
          [1., 0.],
          [0., 0.],
          [1., 0.],
          [

In [52]:
model(states)

<tf.Tensor: shape=(4, 1, 7, 7), dtype=float32, numpy=
array([[[[-3.24230222e-03,  2.09147157e-03, -1.56578720e-02,
           9.91900638e-03, -3.71978921e-03, -4.04301984e-03,
          -9.35408007e-03],
         [-2.80427188e-03, -1.96996443e-02, -2.42071832e-03,
           1.91839859e-02,  2.09264569e-02,  4.71479408e-02,
           1.66787673e-02],
         [-8.73465836e-03,  2.87201572e-02,  2.01078840e-02,
           1.73841696e-02,  1.44961001e-02,  2.08881851e-02,
          -1.03986077e-02],
         [ 2.48998851e-02,  7.00929947e-03, -1.88744888e-02,
          -1.46068046e-02,  1.67077724e-02,  3.71180894e-03,
          -2.16221437e-03],
         [-4.25449386e-03, -1.37350261e-02, -3.04296780e-02,
          -2.21043564e-02, -1.57598294e-02,  6.38778694e-03,
          -1.76124834e-02],
         [-1.03289373e-02, -1.84028223e-02,  3.04969624e-02,
           1.21606817e-03, -1.32810399e-02, -1.33228197e-03,
          -1.12406034e-02],
         [ 1.79370232e-02,  2.63438895e-02,  1

In [53]:
print_bin(np.squeeze(states)[0], True)

shape: (7, 7, 2)
[[1. 0. 0. 1. 1. 1. 1.]
 [0. 1. 1. 0. 1. 1. 1.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1.]
 [0. 0. 1. 1. 0. 0. 0.]
 [1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 0. 1. 0. 0. 0.]]

shape: (7, 7, 2)
[[1. 0. 1. 0. 1. 0. 1.]
 [1. 1. 0. 0. 1. 0. 1.]
 [0. 1. 0. 0. 1. 0. 1.]
 [0. 1. 0. 1. 1. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0.]
 [1. 1. 0. 0. 1. 1. 0.]
 [0. 1. 0. 1. 0. 0. 0.]]


In [54]:
print(np.squeeze(tf.reduce_max(detector(states), axis=-1)))

[[[0. 0. 0. 0. 1. 1. 0.]
  [0. 0. 0. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 1. 1. 0. 0. 1. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 1. 0. 1.]
  [0. 1. 0. 0. 1. 0. 1.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0.]
  [0. 1. 0. 0. 0. 0. 0.]
  [0. 0. 1. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 1.]
  [0. 1. 1. 0. 0. 1. 1.]
  [0. 1. 0. 0. 0. 1. 0.]
  [1. 1. 1. 0. 0. 1. 0.]
  [0. 0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
