## Setup

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

tf.keras.backend.clear_session()  # For easy reset of notebook state.
tf.keras.backend.set_floatx('float32')

## The Layer class
### Layers encapsulate a state (weights) and some computation
The main data structure you'll work is the `Layer`. A layer encapsulates both a state (the layer's "weights") and a transformation from inputs to outputs (a "call", the layer's forward pass).

Here's a densely-connected layer. It has a state: the variables `w` and `b`.

In [8]:
from tensorflow.keras import layers


class Linear(layers.Layer):
    
    def __init__(self, units=32, input_dim=32):
        super(Linear, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
                                                  dtype='float32'),
                             trainable=True)
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(initial_value=b_init(shape=(units,),
                                                  dtype='float32'),
                             trainable=True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
x = tf.ones((2, 2))
linear_layer = Linear(4, 2)
y = linear_layer(x)
print(y)

tf.Tensor(
[[-0.01793753 -0.06586551  0.09978694 -0.11993801]
 [-0.01793753 -0.06586551  0.09978694 -0.11993801]], shape=(2, 4), dtype=float32)


Note that the weights `w` and `b` are automatically tracked by the layer upon being set as layer attributes:

In [9]:
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

Note you also have access to a quicker shortcut for adding weight to a layer: the `add_weight` method:

In [10]:
class Linear(layers.Layer):
    
    def __init__(self, units=32, input_dim=32):
        super(Linear, self).__init__()
        self.w = self.add_weight(shape=(input_dim, units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(units,),
                                 initializer='zeros',
                                 trainable=True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b
    
x = tf.ones((2, 2))
linear_layer = Linear(4, 2)
y = linear_layer(x)
print(y)

tf.Tensor(
[[-0.03158295 -0.10543805 -0.02984916 -0.02694507]
 [-0.03158295 -0.10543805 -0.02984916 -0.02694507]], shape=(2, 4), dtype=float32)


#### Layers can have non-trainable weights
Besides trainable weights, you can add non-trainable weights to a layer as well. Such weights are meant not to be taken into account during backpropagation, when you are training the layer.

Here's how to add and use a non-trainable weight:

In [24]:
class ComputeSum(layers.Layer):
    
    def __init__(self, input_dim):
        super(ComputeSum, self).__init__()
        self.total = tf.Variable(initial_value=tf.zeros((input_dim,)),
                                 trainable=False)
        
    def call(self, inputs):
        self.total.assign_add(tf.reduce_sum(inputs, axis=0))
        return self.total
    
x = tf.ones((2, 2))
my_sum = ComputeSum(2)
y = my_sum(x)
print(y.numpy())
y = my_sum(x)
print(y.numpy())

[2. 2.]
[4. 4.]


It's part of `layer.weights`, but it gets categorized as a non-trainable weight:

In [27]:
print('weights:', len(my_sum.weights))
print('non-trainable weights:', len(my_sum.non_trainable_weights))

# It's not included in the trainable weights:
print('trainable_weights:', my_sum.trainable_weights)

weights: 1
non-trainable weights: 1
trainable_weights: []


### Best practice: deferring weight creation until the shape of the inputs is known

In the logistic regression example above, our `Linear` layer took an `input_dim` argument that was used to compute the shape of the weights `w` and `b` in `__init__`:

In [28]:
class Linear(layers.Layer):
    
    def __init__(self, units=32, input_dim=32):
        super(Linear, self).__init__()
        self.w = self.add_weight(shape=(input_dim, units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(units,),
                                 initializer='zeros',
                                 trainable=True)

In many cases, you may not know in advance the size of your inputs, and you would like to lazily create weights when that value becomes known, some time after instantiating the layer.

In the Keras API, we recommend creating layer weights in the `build(inputs_shape)` method of your layer. Like this:

In [29]:
class Linear(layers.Layer):
    
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units
        
    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                                 initializer='random_normal',
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='random_normal',
                                 trainable=True)
        
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

The `__call__` method of your layer will automatically run `build` the first time it is called. You now have a layer that's lazy and easy to use:

In [53]:
x = tf.ones((5, 3))

linear_layer = Linear(32)  # At instantiation, we don't know on what inputs this is going to get called
y = linear_layer(x)  # The layer's weights are created dynamically the first time the layer is called
y.shape

TensorShape([5, 32])

### Layers are recursively composable
If you assign a Layer instance as attribute of another Layer, the outer layer will start tracking the weights of the inner layer.

We recommend creating such sublayers in the `__init__` method (since the sublayers will typically have a `build` method, they will be built when the outer layer gets built).

In [54]:
# Let's assume we are reusing the Linear class
# with a 'build' method that we defined above.

class MLPBlock(layers.Layer):
    
    def __init__(self):
        super(MLPBlock, self).__init__()
        self.linear_1 = Linear(32)
        self.linear_2 = Linear(32)
        self.linear_3 = Linear(1)
        
    def call(self, inputs):
        x = self.linear_1(inputs)
        x = tf.nn.relu(x)
        x = self.linear_2(x)
        x = tf.nn.relu(x)
        return self.linear_3(x)
    
    
mlp = MLPBlock()
y = mlp(tf.ones(shape=(3, 64)))  # The first call to the 'mlp' will create the weights
print('weights:', len(mlp.weights))
print('trainable weights:', len(mlp.trainable_weights))

weights: 6
trainable weights: 6


In [84]:
for i in range(len(mlp.weights)):
    print(mlp.weights[i].shape)

(64, 32)
(32,)
(32, 32)
(32,)
(32, 1)
(1,)
