In [1]:
import tensorflow as tf
import numpy as np

## Hyperparameters

In [2]:
num_neurons = 100
num_synapses_per_neuron = 10

seed = 42

## Model

In [362]:
np.random.seed(seed=seed)

# Indices of synaps weights in B
B_ind = tf.constant(
    np.array(
        list(
            zip(
                np.repeat(range(num_neurons), repeats=num_synapses_per_neuron),
                np.array([np.sort(np.random.choice(num_neurons, size=num_synapses_per_neuron, replace=False))
                          for n in range(num_neurons)]).flatten()
            )
        )
    ),
    dtype=tf.int64)

# Weight values
B_val = tf.Variable(np.random.normal(scale=0.5, size=num_neurons*num_synapses_per_neuron), dtype=tf.float32)

# Sparse tensor representing B
#B = tf.SparseTensor(indices=B_ind, values=B_val, dense_shape=[num_neurons, num_neurons])
#B = tf.sparse.reorder(B)

def model_forward_step(input_activations, B):
    logits = tf.sparse.sparse_dense_matmul(B, tf.reshape(input_activations, [-1, 1]))
    output_activations = tf.nn.relu(logits)
    return logits, output_activations


## Problem: XOR

In [107]:
truth_table = [([0, 0], 0), 
               ([0, 1], 1), 
               ([1, 0], 1), 
               ([1, 1], 0)]

# We will use the first two neurons as input, and the third as xor output.
# The remaining 97 will be used for computation

def get_activations(target, features=None, prev_activations=tf.zeros(num_neurons, dtype=tf.float32)):
    if features is None:
        features = prev_activations[0:2]
    return tf.cast(tf.concat([features, [target], prev_activations[3:]], axis=0), tf.float32)

def get_training_dataset():
    def gen():
        for input, output in truth_table:
            yield (input, output)
    return tf.data.Dataset.from_generator(gen, output_types=(tf.float32, tf.float32)).repeat()

## Training

In [363]:
def training_step(features, target):
    print("Features: {}".format(features))
    print("Target: {}".format(target))
    input_activations = get_activations(target, features)
    B = tf.SparseTensor(indices=B_ind, values=B_val, dense_shape=[num_neurons, num_neurons])
    with tf.GradientTape() as t:
        t.watch(B.values)
        
        losses = []
        for i in range(5):
            logits, output_activations = model_forward_step(input_activations, B)
            print("Step: {}, Pred: {}, Non-zero activations: {}, logit: {}".format(i, tf.nn.sigmoid(logits[2]), tf.math.count_nonzero(output_activations), logits[2]))
            losses.append(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits[2], labels=[target]))
            input_activations = get_activations(target, prev_activations=tf.reshape(output_activations, [-1]))

        loss = tf.reduce_sum(losses)
        grad = t.gradient(loss, B.values)
        print("Loss: {}".format(loss))
        print("Grad sum: {}".format(tf.reduce_sum(grad)))
        B_val.assign_add(- 0.5 * grad) # SGD
        
    return loss
    

# Training loop
#for example in get_training_dataset():
#    
#    loss = training_step(input_activations)
#    print(loss)

In [293]:
training_dataset = get_training_dataset()
training_dataset_iter = iter(training_dataset)

In [391]:
features, target = next(training_dataset_iter)
features, target

(<tf.Tensor: id=298534, shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>,
 <tf.Tensor: id=298535, shape=(), dtype=float32, numpy=0.0>)

In [392]:
#B = tf.SparseTensor(indices=B_ind, values=B_val, dense_shape=[num_neurons, num_neurons])
training_step(features, target)

Features: [1. 1.]
Target: 0.0
Step: 0, Pred: [0.5], Non-zero activations: 5, logit: [0.]
Step: 1, Pred: [0.5], Non-zero activations: 14, logit: [0.]
Step: 2, Pred: [0.08815614], Non-zero activations: 34, logit: [-2.3363593]
Step: 3, Pred: [0.07420036], Non-zero activations: 40, logit: [-2.5238888]
Step: 4, Pred: [0.01514301], Non-zero activations: 46, logit: [-4.1749573]
Loss: 1.5709370374679565
Grad sum: -0.871904730796814


<tf.Tensor: id=298800, shape=(), dtype=float32, numpy=1.570937>

In [89]:
B.values

<tf.Tensor: id=12, shape=(1000,), dtype=float32, numpy=
array([ 1.93379447e-01, -6.68143034e-01, -9.79925573e-01, -3.74534160e-01,
       -3.44063967e-01, -5.33090413e-01, -9.98212278e-01, -2.78611062e-03,
       -1.95733857e+00,  2.73118436e-01,  1.64354146e+00, -2.73914027e+00,
        3.91127884e-01, -7.23964751e-01,  1.56822372e-02,  1.02305651e+00,
       -6.44340277e-01,  1.90026844e+00, -1.34817708e+00,  1.16750979e+00,
        3.82653564e-01, -7.53862143e-01, -1.74277198e+00,  1.54032397e+00,
        1.12493253e+00,  1.18614542e+00,  4.09438521e-01, -2.53951699e-01,
       -5.53177238e-01, -2.76671767e-01,  4.58994001e-01, -1.27095175e+00,
       -1.88202441e-01,  4.18759972e-01, -1.03586853e-01,  9.70316231e-01,
       -3.78579736e-01, -2.13875175e-01,  2.20583543e-01,  1.13394395e-01,
        6.59531474e-01, -1.68384925e-01,  1.76651180e+00,  3.76965739e-02,
       -3.26012492e-01,  9.31583345e-02, -2.55354953e+00,  1.03860605e+00,
        8.98669183e-01, -5.10989189e-01, -3.

In [12]:
features, target = next(training_dataset_iter)

In [13]:
input_activations = get_activations(features, target)
input_activations

<tf.Tensor: id=95, shape=(100,), dtype=float32, numpy=
array([1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)>

In [129]:
with tf.GradientTape() as t:
    t.watch(B.values)
    
    output_activations = tf.sparse.sparse_dense_matmul(B, tf.reshape(input_activations, [-1, 1]))
    loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=output_activations[0], labels=[target])
    
    grad = t.gradient(loss, B.values)
    print(tf.reduce_sum(grad))
    #t.watch(B_val)
    #print(input_activations)
    #output_activations = model_forward_step(input_activations)
    #print(output_activations[10])
    #loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=output_activations[10], labels=[target])
    #grad = t.gradient(B_val, output_activations[10])

B_val.assign(B_val - 0.01*grad)

tf.Tensor(-0.45180523, shape=(), dtype=float32)


<tf.Variable 'UnreadVariable' shape=(1000,) dtype=float32, numpy=
array([ 1.97897494e-01, -6.68143034e-01, -9.79925573e-01, -3.74534160e-01,
       -3.44063967e-01, -5.33090413e-01, -9.98212278e-01, -2.78611062e-03,
       -1.95733857e+00,  2.73118436e-01,  1.64354146e+00, -2.73914027e+00,
        3.91127884e-01, -7.23964751e-01,  1.56822372e-02,  1.02305651e+00,
       -6.44340277e-01,  1.90026844e+00, -1.34817708e+00,  1.16750979e+00,
        3.82653564e-01, -7.53862143e-01, -1.74277198e+00,  1.54032397e+00,
        1.12493253e+00,  1.18614542e+00,  4.09438521e-01, -2.53951699e-01,
       -5.53177238e-01, -2.76671767e-01,  4.58994001e-01, -1.27095175e+00,
       -1.88202441e-01,  4.18759972e-01, -1.03586853e-01,  9.70316231e-01,
       -3.78579736e-01, -2.13875175e-01,  2.20583543e-01,  1.13394395e-01,
        6.59531474e-01, -1.68384925e-01,  1.76651180e+00,  3.76965739e-02,
       -3.26012492e-01,  9.31583345e-02, -2.55354953e+00,  1.03860605e+00,
        8.98669183e-01, -5.1098918

In [128]:
grad.reduce_sum()

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute 'reduce_sum'

In [60]:
mat = tf.Variable(tf.ones((2,2)))
mat

<tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
array([[1., 1.],
       [1., 1.]], dtype=float32)>

In [291]:
sparse_vals = tf.Variable([1.0, 1.0, 1.0, 1.0])
sparse_vals

<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>

In [292]:
sparse = tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0], [1, 1]], values=sparse_vals, dense_shape=[2, 2])
sparse

<tensorflow.python.framework.sparse_tensor.SparseTensor at 0x7fb7425e2128>

In [280]:
with tf.GradientTape() as t:
    t.watch(sparse.values)
    res = tf.sparse.sparse_dense_matmul(sparse, [[1.0], [1.0]])
    print(res)
    print(t.gradient(res[0,0], sparse.values))

tf.Tensor(
[[2.]
 [2.]], shape=(2, 1), dtype=float32)
tf.Tensor([1. 1. 0. 0.], shape=(4,), dtype=float32)


In [287]:
sparse_vals

<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([2., 1., 1., 1.], dtype=float32)>

In [293]:
sparse._values

<tf.Tensor: id=139120, shape=(4,), dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>

In [62]:
with tf.GradientTape() as t:
    res = tf.matmul(mat, [[1.0], [1.0]])
    print(res)
    print(t.gradient(res[0,0], mat))

tf.Tensor(
[[2.]
 [2.]], shape=(2, 1), dtype=float32)
tf.Tensor(
[[1. 1.]
 [0. 0.]], shape=(2, 2), dtype=float32)


In [77]:
tf.sparse.to_dense(B)

<tf.Tensor: id=1332, shape=(100, 100), dtype=float32, numpy=
array([[ 0.19337945,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  1.1675098 ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [-2.6745672 ,  0.        ,  0.        , ...,  0.        ,
         0.28004533,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)>

In [82]:
B.indices

<tf.Tensor: id=11, shape=(1000, 2), dtype=int64, numpy=
array([[ 0,  0],
       [ 0, 10],
       [ 0, 22],
       ...,
       [99, 73],
       [99, 77],
       [99, 78]])>

In [247]:
np.array([np.sort(np.random.choice(num_neurons, size=num_synapses_per_neuron, replace=False))
                          for n in range(num_neurons)]).flatten()

array([22, 26, 32, 45, 52, 62, 64, 70, 84, 93,  3,  6, 17, 20, 26, 52, 55,
       67, 71, 94, 15, 21, 29, 42, 49, 51, 64, 76, 78, 81,  2, 11, 25, 29,
       49, 60, 66, 70, 71, 88, 11, 17, 25, 30, 35, 41, 43, 56, 79, 86, 10,
       15, 20, 25, 49, 50, 60, 73, 84, 96,  7, 18, 19, 21, 59, 63, 74, 82,
       94, 99, 13, 19, 42, 52, 62, 66, 70, 71, 73, 74, 11, 15, 20, 32, 34,
       40, 54, 83, 92, 98,  0, 13, 29, 43, 44, 64, 74, 76, 87, 94,  2, 19,
       48, 49, 58, 71, 76, 78, 85, 98,  7, 19, 35, 45, 53, 71, 72, 85, 88,
       93,  9, 11, 17, 24, 30, 36, 57, 68, 73, 97,  0,  9, 19, 27, 29, 61,
       62, 64, 77, 81,  4, 18, 31, 36, 44, 51, 80, 81, 87, 99,  0,  4, 18,
       20, 35, 40, 44, 50, 78, 94, 13, 22, 23, 29, 45, 48, 51, 81, 87, 93,
        4,  8, 19, 28, 42, 45, 46, 58, 95, 98,  9, 11, 13, 15, 22, 58, 64,
       72, 82, 89, 16, 20, 23, 37, 40, 45, 48, 69, 72, 96,  0,  2, 31, 36,
       37, 48, 50, 54, 85, 91,  8, 12, 26, 34, 35, 45, 48, 51, 55, 95,  7,
       20, 33, 51, 53, 64

In [153]:
get_activations(target, features)

<tf.Tensor: id=4183, shape=(100,), dtype=float32, numpy=
array([1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)>