In [1]:
import tensorflow as tf
from typing import Callable
import numpy as np

In [2]:
def pprint_sparse_tensor(st):
    s = "<SparseTensor shape=%s \n values={" % (st.dense_shape.numpy().tolist(),)
    for (index, value) in zip(st.indices, st.values):
        s += f"\n  %s: %s" % (index.numpy().tolist(), value.numpy().tolist())
    s += "}>"
    print(s)

In [3]:
def generate_selection_tensor(
    shape: tf.TensorShape,
    range_start: int,
    range_stop: int,
    dtype: str = "float32"
) -> tf.SparseTensor:
    """
    Function to produce (\mathbf{S}^{(\alpha)})^{T}
    """
    if range_start == range_stop:
        return tf.sparse.transpose(tf.sparse.from_dense(tf.zeros(shape, dtype=dtype)))
    
    ones_indices = [[i, i] for i in range(range_start, range_stop)]
    
    return tf.sparse.transpose(tf.SparseTensor(
        indices=ones_indices,
        values=tf.ones(len(ones_indices), dtype=dtype),
        dense_shape=shape,
    ))

In [4]:
class FreePropModel:
    def __init__(
        self,
        input_shape: int,
        output_shape: int,
        weights: tf.Variable,       
        iterations: int,
        input_activation_fn: Callable[[tf.Tensor], tf.Tensor],
        output_activation_fn: Callable[[tf.Tensor], tf.Tensor],
        hidden_activation_fn: Callable[[tf.Tensor], tf.Tensor],
        dtype: str = "float32",
    ) -> None:
        self.input_shape = input_shape
        self.output_shape = output_shape
        # (\mathbf{W})^{T}
        self.weights = weights
        # l
        self.iterations = iterations
        # a_{d}
        self.input_activation_fn = input_activation_fn
        # a_{r}
        self.output_activation_fn = output_activation_fn
        #a_{h}
        self.hidden_activation_fn = hidden_activation_fn
        self.dtype = dtype
        
        # \mathbf{S}^{(d)}
        self._input_selection_tensor = generate_selection_tensor(
            self.weights.shape,
            0,
            self.input_shape,
            dtype=self.dtype,
        )
        # \mathbf{S}^{(r)}
        self._output_selection_tensor = generate_selection_tensor(
            self.weights.shape,
            self.input_shape,
            self.input_shape + self.output_shape,
            dtype=self.dtype,
        )
        
        # \mathbf{S}^{(h)}
        self._hidden_selection_tensor = generate_selection_tensor(
            self.weights.shape,
            self.input_shape + self.output_shape,
            self.weights.shape[0],
            dtype=self.dtype,
        )
        
    def fit(self, x):
        # \mathbf{N}
        node_tensor = tf.zeros((x.shape[0], self.weights.shape[0]), dtype=self.dtype)
        node_tensor = tf.tensor_scatter_nd_update(
            node_tensor,
            self._generate_node_tensor_indicies_for_inputs(x.shape[0]),
            x,
        )
        node_tensor = self.input_activation_fn(node_tensor)
        node_tensor = tf.transpose(node_tensor)
        
        for _ in tf.range(self.iterations):
            # (\mathbf{T})^{T}
            T = tf.tensordot(self.weights, node_tensor, axes=[[1], [0]])
            # (\mathbf{T}^{(d)})^{T}
            T_d = tf.sparse.sparse_dense_matmul(self._input_selection_tensor, self.input_activation_fn(T))
            # (\mathbf{T}^{(r)})^{T}
            T_r = tf.sparse.sparse_dense_matmul(self._output_selection_tensor, self.output_activation_fn(T))
            # (\mathbf{T}^{(h)})^{T}
            T_h = tf.sparse.sparse_dense_matmul(self._hidden_selection_tensor, self.hidden_activation_fn(T))
            
            node_tensor = T_d + T_r + T_h
        
        node_tensor = tf.transpose(node_tensor)
        
        # return \mathbf{\hat{Y}}
        return node_tensor[:, self.input_shape:self.input_shape + self.output_shape]
        
    def _generate_node_tensor_indicies_for_inputs(self, batch_size):
        result = []
        # only rank 2 inputs are allowed right now, meaning that tensor is in the form (batch_size, data)
        for i in range(batch_size):
            tmp = []
            for j in range(self.input_shape):
                tmp.append([i, j])
            result.append(tmp)
        return result

# Data initialization

In [6]:
data = tf.constant(
    [
        [1, 1, 1],
        [2, 2, 2],
        [3, 3, 3],
        [4, 4, 4],
        [5, 5, 5],
        [1.28, 1.29, 1.3],
        [3, 2, 1],
        [8.28282, 8.3, 4.1],
        [0, 0, 0],
        [32, 10000, 12323232]
    ],
    dtype="float32"
)

# Neural network with only input and output layers

In [7]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3, activation=tf.identity),
])
baseline_model.build(input_shape=data.shape)
baseline_model.set_weights([
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
])

In [8]:
with tf.GradientTape() as tape:
    baseline_result = baseline_model(data)
    baseline_gradient = tape.gradient(baseline_result, baseline_model.trainable_variables)

In [9]:
weights = tf.SparseTensor(
    indices=[
        *[[i, j] for i in range(0, 3) for j in range(3, 6)],
    ],
    values=tf.constant([2] * 9, dtype="float32"),
    dense_shape=(6, 6),
)
weights = tf.Variable(tf.transpose(tf.sparse.to_dense(weights)))
free_prop_model = FreePropModel(
    3,
    3,
    weights,
    1,
    tf.identity,
    tf.identity,
    tf.identity,
)

In [10]:
with tf.GradientTape() as tape:
    free_prop_result = free_prop_model.fit(data)
    free_prop_gradient = tape.gradient(free_prop_result, free_prop_model.weights)

In [11]:
baseline_result == free_prop_result

<tf.Tensor: shape=(10, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [12]:
baseline_gradient[0] == tf.transpose(free_prop_gradient)[:3, 3:]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

# Neural network with one hidden layer and sigmoid activation functions

In [124]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
])
baseline_model.build(input_shape=data.shape)
baseline_model.set_weights([
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
])

In [125]:
with tf.GradientTape() as tape:
    baseline_result = baseline_model(data)
    baseline_gradient = tape.gradient(baseline_result, baseline_model.trainable_variables)

In [126]:
weights = tf.SparseTensor(
    indices=[
        *[[i, j] for i in range(3) for j in range(6, 9)],
        *[[i, j] for i in range(6, 9) for j in range(3,6)]
    ],
    values=tf.Variable([2] * 18, dtype="float32"),
    dense_shape=(9, 9),
)
weights = tf.Variable(tf.transpose(tf.sparse.to_dense(weights)))
free_prop_model = FreePropModel(
    3,
    3,
    weights,
    2,
    tf.identity,
    tf.sigmoid,
    tf.sigmoid,
)

In [127]:
with tf.GradientTape() as tape:
    free_prop_result = free_prop_model.fit(data)
    free_prop_gradient = tape.gradient(free_prop_result, free_prop_model.weights)

In [128]:
baseline_result == free_prop_result

<tf.Tensor: shape=(10, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [129]:
baseline_gradient[0] == tf.transpose(free_prop_gradient)[:3, 6:]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [133]:
baseline_gradient[2] - tf.transpose(free_prop_gradient)[6:, 3:6]

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[3.7252903e-09, 3.7252903e-09, 3.7252903e-09],
       [3.7252903e-09, 3.7252903e-09, 3.7252903e-09],
       [3.7252903e-09, 3.7252903e-09, 3.7252903e-09]], dtype=float32)>

In [134]:
baseline_gradient[2]

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227]], dtype=float32)>

In [135]:
tf.transpose(free_prop_gradient)[6:, 3:6]

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227],
       [0.0448227, 0.0448227, 0.0448227]], dtype=float32)>

# Complex case

Neural network with three hidden layers and different activation function in the last layer and 5 output neurons

In [64]:
baseline_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(3, activation=tf.sigmoid),
    tf.keras.layers.Dense(5, activation=tf.tanh),
])
baseline_model.build(input_shape=data.shape)
baseline_model.set_weights([
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0], dtype="float32"),
    np.array([[2, 2, 2, 2, 2], [2, 2, 2, 2, 2], [2, 2, 2, 2, 2]], dtype="float32"),
    np.array([0, 0, 0, 0, 0], dtype="float32"),
])

In [65]:
weights = tf.SparseTensor(
    indices=[
        *[[i, j] for i in range(3) for j in range(8, 11)],
        *[[i, j] for i in range(8, 11) for j in range(11, 14)],
        *[[i, j] for i in range(11, 14) for j in range(14, 17)],
        *[[i, j] for i in range(14, 17) for j in range(3, 8)],
    ],
    values=tf.Variable([2] * 42, dtype="float32"),
    dense_shape=(17, 17),
)
weights = tf.Variable(tf.transpose(tf.sparse.to_dense(weights)))
free_prop_model = FreePropModel(
    3,
    5,
    weights,
    4,
    tf.identity,
    tf.tanh,
    tf.sigmoid,
)

In [66]:
with tf.GradientTape() as tape:
    baseline_result = baseline_model(data)
    baseline_gradient = tape.gradient(baseline_result, baseline_model.trainable_variables)

In [67]:
with tf.GradientTape() as tape:
    free_prop_result = free_prop_model.fit(data)
    free_prop_gradient = tape.gradient(free_prop_result, free_prop_model.weights)

In [68]:
baseline_result == free_prop_result

<tf.Tensor: shape=(10, 5), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])>

In [81]:
baseline_gradient[0] == tf.transpose(free_prop_gradient)[:3, 8:11]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [95]:
baseline_gradient[2] == tf.transpose(free_prop_gradient)[8:11, 11:14]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [106]:
baseline_gradient[4] == tf.transpose(free_prop_gradient)[11:14, 14:]

<tf.Tensor: shape=(3, 3), dtype=bool, numpy=
array([[ True,  True,  True],
       [ True,  True,  True],
       [ True,  True,  True]])>

In [113]:
baseline_gradient[6] == tf.transpose(free_prop_gradient)[14:, 3:8]

<tf.Tensor: shape=(3, 5), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])>