In [1]:
import tensorflow as tf
from typing import Callable
import numpy as np
from free_propagation_model import FreePropModel

# Data initialization

These values for inputs are used to test outputs of baseline and free propagation.

In [2]:
data = tf.constant(
    [
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [4, 4, 4],
        [5, 5, 5],
        [1.28, 1.29, 1.3],
        [3, 2, 1],
        [8.28282, 8.3, 4.1],
        [0, 0, 0],
        [32, 10000, 12323232]
    ],
    dtype="float32"
)

# Case 1

Neural network with only input and output layers and with identity as the activation function

In [3]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3, activation=tf.identity),
])
baseline_model.build(input_shape=data.shape)
baseline_model.set_weights([
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
])

In [4]:
with tf.GradientTape() as tape:
    baseline_result = baseline_model(data)
    baseline_gradient = tape.gradient(baseline_result, baseline_model.trainable_variables)

In [5]:
weights = tf.SparseTensor(
    indices=[
        *[[i, j] for i in range(0, 3) for j in range(3, 6)],
    ],
    values=tf.constant([2] * 9, dtype="float32"),
    dense_shape=(6, 6),
)
weights = tf.Variable(tf.transpose(tf.sparse.to_dense(weights)))
free_prop_model = FreePropModel(
    3,
    3,
    weights,
    1,
    tf.identity,
    tf.identity,
    tf.identity,
)

In [6]:
with tf.GradientTape() as tape:
    free_prop_result = free_prop_model.fit(data)
    free_prop_gradient = tape.gradient(free_prop_result, free_prop_model.weights)

In [7]:
baseline_result == free_prop_result

<tf.Tensor: shape=(10, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [8]:
baseline_gradient[0] == tf.transpose(free_prop_gradient)[:3, 3:]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

# Case 2

Neural network with one hidden layer and sigmoid activation functions

In [9]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
])
baseline_model.build(input_shape=data.shape)
baseline_model.set_weights([
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
])

In [10]:
with tf.GradientTape() as tape:
    baseline_result = baseline_model(data)
    baseline_gradient = tape.gradient(baseline_result, baseline_model.trainable_variables)

In [11]:
weights = tf.SparseTensor(
    indices=[
        *[[i, j] for i in range(3) for j in range(6, 9)],
        *[[i, j] for i in range(6, 9) for j in range(3,6)]
    ],
    values=tf.Variable([2] * 18, dtype="float32"),
    dense_shape=(9, 9),
)
weights = tf.Variable(tf.transpose(tf.sparse.to_dense(weights)))
free_prop_model = FreePropModel(
    3,
    3,
    weights,
    2,
    tf.identity,
    tf.sigmoid,
    tf.sigmoid,
)

In [12]:
with tf.GradientTape() as tape:
    free_prop_result = free_prop_model.fit(data)
    free_prop_gradient = tape.gradient(free_prop_result, free_prop_model.weights)

In [13]:
baseline_result == free_prop_result

<tf.Tensor: shape=(10, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [14]:
baseline_gradient[0] == tf.transpose(free_prop_gradient)[:3, 6:]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

These corresponding gradients probably the same and the difference below can be explained by rounding error. Values of are displayed below, they clearly should be equal.

In [15]:
baseline_gradient[2] - tf.transpose(free_prop_gradient)[6:, 3:6]

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[3.7252903e-09, 3.7252903e-09, 3.7252903e-09],
       [3.7252903e-09, 3.7252903e-09, 3.7252903e-09],
       [3.7252903e-09, 3.7252903e-09, 3.7252903e-09]], dtype=float32)>

In [16]:
baseline_gradient[2]

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227]], dtype=float32)>

In [17]:
tf.transpose(free_prop_gradient)[6:, 3:6]

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227]], dtype=float32)>

# Case 3

Neural network with three hidden layers and different activation function in the last layer and 5 output neurons

In [18]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(5, activation=tf.tanh),
])
baseline_model.build(input_shape=data.shape)
baseline_model.set_weights([
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0, 0, 0], dtype="float32"),
])

In [19]:
weights = tf.SparseTensor(
    indices=[
        *[[i, j] for i in range(3) for j in range(8, 11)],
        *[[i, j] for i in range(8, 11) for j in range(11, 14)],
        *[[i, j] for i in range(11, 14) for j in range(14, 17)],
        *[[i, j] for i in range(14, 17) for j in range(3, 8)],
    ],
    values=tf.Variable([2] * 42, dtype="float32"),
    dense_shape=(17, 17),
)
weights = tf.Variable(tf.transpose(tf.sparse.to_dense(weights)))
free_prop_model = FreePropModel(
    3,
    5,
    weights,
    4,
    tf.identity,
    tf.tanh,
    tf.sigmoid,
)

In [20]:
with tf.GradientTape() as tape:
    baseline_result = baseline_model(data)
    baseline_gradient = tape.gradient(baseline_result, baseline_model.trainable_variables)

In [21]:
with tf.GradientTape() as tape:
    free_prop_result = free_prop_model.fit(data)
    free_prop_gradient = tape.gradient(free_prop_result, free_prop_model.weights)

In [22]:
baseline_result == free_prop_result

<tf.Tensor: shape=(10, 5), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])>

In [23]:
baseline_gradient[0] == tf.transpose(free_prop_gradient)[:3, 8:11]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [24]:
baseline_gradient[2] == tf.transpose(free_prop_gradient)[8:11, 11:14]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [25]:
baseline_gradient[4] == tf.transpose(free_prop_gradient)[11:14, 14:]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [26]:
baseline_gradient[6] == tf.transpose(free_prop_gradient)[14:, 3:8]

<tf.Tensor: shape=(3, 5), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])>