In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
zero_initializer = tf.zeros_initializer()
tensor_1 = zero_initializer(shape=(2, 3))
tensor_1

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 0., 0.]], dtype=float32)>

In [3]:
random_normal_initializer = tf.random_normal_initializer()
tensor_2 = random_normal_initializer(shape=(4, 5))
tensor_2

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 0.00424241,  0.04777312,  0.08389214, -0.01690874, -0.0275795 ],
       [ 0.01442562, -0.04790157, -0.07017928,  0.0403901 , -0.05316466],
       [ 0.01912783, -0.05242811, -0.04191307, -0.00206123, -0.00726662],
       [-0.10127562, -0.0436131 ,  0.04286411, -0.01425279, -0.04834786]],
      dtype=float32)>

# Custom Layers

In [10]:
# Custom Layer
class SimpleLinear(layers.Layer):

  def __init__(self, input_dim=16, units=8, **kwargs):
    
    super(SimpleLinear, self).__init__(**kwargs)

    w_init = tf.random_normal_initializer()

    self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
                                              dtype='float32'),
                         trainable=True)
    
    b_init = tf.zeros_initializer()

    self.b = tf.Variable(initial_value=b_init(shape=(units),
                                              dtype='float32'),
                         trainable=True)
  

  def call(self, input_tensor):
    return tf.matmul(input_tensor, self.w) + self.b

In [11]:
x = tf.ones((4, 3))
x

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]], dtype=float32)>

In [12]:
layer1 = SimpleLinear(input_dim=3, units=5)
y = layer1(x)
print(y)

tf.Tensor(
[[-0.09219853 -0.17905241  0.05790117  0.01243253 -0.14067876]
 [-0.09219853 -0.17905241  0.05790117  0.01243253 -0.14067876]
 [-0.09219853 -0.17905241  0.05790117  0.01243253 -0.14067876]
 [-0.09219853 -0.17905241  0.05790117  0.01243253 -0.14067876]], shape=(4, 5), dtype=float32)


In [13]:
layer1.weights # All weights

[<tf.Variable 'Variable:0' shape=(3, 5) dtype=float32, numpy=
 array([[-0.04613694, -0.12505102,  0.09280653, -0.03614628, -0.04756309],
        [-0.02660743,  0.01634117, -0.07319044,  0.04588107, -0.04395161],
        [-0.01945416, -0.07034256,  0.03828509,  0.00269773, -0.04916407]],
       dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>]

In [14]:
layer1.trainable_variables # Trainable weights

[<tf.Variable 'Variable:0' shape=(3, 5) dtype=float32, numpy=
 array([[-0.04613694, -0.12505102,  0.09280653, -0.03614628, -0.04756309],
        [-0.02660743,  0.01634117, -0.07319044,  0.04588107, -0.04395161],
        [-0.01945416, -0.07034256,  0.03828509,  0.00269773, -0.04916407]],
       dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(5,) dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>]

In [15]:
layer1.non_trainable_variables

[]

In [16]:
layer1.losses

[]

In [20]:
# Better way to initialize weights

class SimpleLinear(layers.Layer):

  def __init__(self, input_dim=16, units=8, **kwargs):

    super(SimpleLinear, self).__init__(**kwargs)

    self.w = self.add_weight(shape=(input_dim, units),
                             initializer='ones',
                             trainable=True)
    
    self.b = self.add_weight(shape=(units),
                             initializer='ones',
                             trainable=False) # value won't be updated during backpropogation
    
  
  def call(self, input_tensor):
    return tf.matmul(input_tensor, self.w) + self.b

In [21]:
random_uniform_initializer = tf.random_uniform_initializer()
x = random_uniform_initializer(shape=(4, 3))
x

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[ 0.01496513,  0.00303781,  0.01837255],
       [-0.03730161,  0.04084904,  0.02536943],
       [ 0.02333689,  0.03424232, -0.03479265],
       [-0.04310625,  0.00025379,  0.02537444]], dtype=float32)>

In [22]:
layer2 = SimpleLinear(input_dim=x.shape[1], units=4)
y = layer2(x)
y

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[1.0363755 , 1.0363755 , 1.0363755 , 1.0363755 ],
       [1.0289168 , 1.0289168 , 1.0289168 , 1.0289168 ],
       [1.0227866 , 1.0227866 , 1.0227866 , 1.0227866 ],
       [0.98252195, 0.98252195, 0.98252195, 0.98252195]], dtype=float32)>

In [23]:
layer2.weights

[<tf.Variable 'Variable:0' shape=(3, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>]

In [24]:
layer2.trainable_variables

[<tf.Variable 'Variable:0' shape=(3, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>]

In [25]:
layer2.non_trainable_variables

[<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>]

In [26]:
# Instead of initializing weights in the beginning, they can be initialized based on the shape of input data during forward pass of first batch
class SimpleLinear(layers.Layer):

  def __init__(self, units=8, **kwargs):

    super(SimpleLinear, self).__init__(**kwargs)
    self.units = units
  

  def build(self, input_shape):
    # During the forward pass, the layer will automatically call the build() method to determine input shape and intialize weights
    
    self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
    
    self.b = self.add_weight(shape=(self.units),
                             initializer='ones',
                             trainable=True)
    
  
  def call(self, input_tensor):
    return tf.matmul(input_tensor, self.w) + self.b

In [27]:
layer3 = SimpleLinear(units=3)
y = layer3(x) # Shape of input is automatically inferred and weights initialized accordingly
y

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[0.99918425, 0.99952656, 1.000024  ],
       [0.9961427 , 0.99773246, 1.0008912 ],
       [1.0034626 , 0.9953954 , 1.0003178 ],
       [0.9961837 , 1.001798  , 1.0002899 ]], dtype=float32)>

In [28]:
# Combining multiple layers into a composite layer
class SimpleLinearBlock(layers.Layer):

  def __init__(self, block_1_units=2,
               block_2_units=4, block_3_units=8, **kwargs):
    
    super(SimpleLinearBlock, self).__init__(**kwargs)

    self.linear_1 = SimpleLinear(block_1_units)
    self.linear_2 = SimpleLinear(block_2_units)
    self.linear_3 = SimpleLinear(block_3_units)

  
  def call(self, inputs):

    x = self.linear_1(inputs)
    x = tf.nn.relu(x)

    x = self.linear_2(x)
    x = tf.nn.relu(x)

    return self.linear_3(x)

In [29]:
x = random_uniform_initializer((4, 5))
x

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 0.01877796,  0.0436945 , -0.01119548,  0.00883502,  0.00882214],
       [ 0.04101385,  0.0341553 ,  0.0102047 ,  0.00952556, -0.0070714 ],
       [-0.01743801, -0.02397997, -0.02143389,  0.02523614,  0.04032494],
       [ 0.0400025 ,  0.03047463, -0.03704749, -0.04350666,  0.00772578]],
      dtype=float32)>

In [30]:
simple_linear_block = SimpleLinearBlock()
y = simple_linear_block(x)
y

<tf.Tensor: shape=(4, 8), dtype=float32, numpy=
array([[0.93721753, 1.0604069 , 1.0220245 , 0.9019017 , 0.9754738 ,
        1.0986675 , 0.894343  , 1.1307617 ],
       [0.9372249 , 1.0603839 , 1.0220094 , 0.90197957, 0.9754768 ,
        1.0986347 , 0.89436275, 1.1306885 ],
       [0.9372059 , 1.0604246 , 1.022039  , 0.9017903 , 0.97547865,
        1.0987214 , 0.8942896 , 1.1308718 ],
       [0.9372228 , 1.0604107 , 1.0220239 , 0.9019457 , 0.9754655 ,
        1.0986414 , 0.89438176, 1.1307147 ]], dtype=float32)>

In [31]:
simple_linear_block.weights

[<tf.Variable 'simple_linear_block/simple_linear_6/Variable:0' shape=(5, 2) dtype=float32, numpy=
 array([[ 0.09758361,  0.07517762],
        [ 0.00920575, -0.04373526],
        [ 0.07555021,  0.05549886],
        [-0.08100348,  0.07879134],
        [-0.05935478, -0.058478  ]], dtype=float32)>,
 <tf.Variable 'simple_linear_block/simple_linear_6/Variable:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>,
 <tf.Variable 'simple_linear_block/simple_linear_7/Variable:0' shape=(2, 4) dtype=float32, numpy=
 array([[-0.06515337, -0.01697101, -0.05418686, -0.09895294],
        [ 0.022919  ,  0.01554918,  0.03477591, -0.0580777 ]],
       dtype=float32)>,
 <tf.Variable 'simple_linear_block/simple_linear_7/Variable:0' shape=(4,) dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>,
 <tf.Variable 'simple_linear_block/simple_linear_8/Variable:0' shape=(4, 8) dtype=float32, numpy=
 array([[ 0.08351999, -0.05494576, -0.01653243, -0.06503007,  0.062482  ,
          0.03559432,

In [32]:
# Custom layer to calculate loss
class RegularizationLoss(layers.Layer):

  def __init__(self, rate=1e-3, **kwargs):
    
    super(RegularizationLoss, self).__init__(**kwargs)

    self.rate = rate
  

  def call(self, input_tensor):
    self.add_loss(self.rate * tf.reduce_sum(input_tensor))

    return input_tensor

In [33]:
reg_loss_layer = RegularizationLoss()
y = reg_loss_layer(x) # Inputs were not transformed. Only loss was calculated
y

<tf.Tensor: shape=(4, 5), dtype=float32, numpy=
array([[ 0.01877796,  0.0436945 , -0.01119548,  0.00883502,  0.00882214],
       [ 0.04101385,  0.0341553 ,  0.0102047 ,  0.00952556, -0.0070714 ],
       [-0.01743801, -0.02397997, -0.02143389,  0.02523614,  0.04032494],
       [ 0.0400025 ,  0.03047463, -0.03704749, -0.04350666,  0.00772578]],
      dtype=float32)>

In [34]:
reg_loss_layer.losses

[<tf.Tensor: shape=(), dtype=float32, numpy=0.0001571201>]

In [35]:
class SimpleLinearRegularized(layers.Layer):

  def __init__(self, units=8, **kwargs):

    super(SimpleLinearRegularized, self).__init__(**kwargs)
    self.units = units
    self.reg = RegularizationLoss(1e-2)
  

  def build(self, input_shape):
    
    self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
    
    self.b = self.add_weight(shape=(self.units),
                             initializer='ones',
                             trainable=True)
    
  
  def call(self, input_tensor):
    output = tf.matmul(input_tensor, self.w) + self.b
    return self.reg(output) # Calculates loss but does not change the tensor

In [36]:
layer4 = SimpleLinearRegularized(units=3)
y = layer4(x)
y

<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
array([[1.0014913 , 1.0033051 , 1.0015633 ],
       [1.0038345 , 1.0016115 , 0.99912006],
       [0.9936041 , 0.9996777 , 1.0057316 ],
       [1.0041064 , 1.0037521 , 0.9997213 ]], dtype=float32)>

In [37]:
layer4.losses

[<tf.Tensor: shape=(), dtype=float32, numpy=0.12017519>]

In [38]:
class DenseRegularized(layers.Layer):

  def __init__(self, units=8, **kwargs):

    super(DenseRegularized, self).__init__(**kwargs)

    self.dense = layers.Dense(units, kernel_regularizer=tf.keras.regularizers.l2(1e-3))
    self.reg = RegularizationLoss(1e-2)
  

  def call(self, input_tensor):
    return  self.reg(self.dense(input_tensor))

In [39]:
layer5 = DenseRegularized()
y = layer5(x)
y

<tf.Tensor: shape=(4, 8), dtype=float32, numpy=
array([[-0.00030002,  0.00669091, -0.00423164,  0.01630089,  0.0133537 ,
        -0.03279892, -0.02855805, -0.02391658],
       [-0.00351323, -0.00805707,  0.02733838,  0.0211803 , -0.01432209,
        -0.01817166, -0.01657871, -0.01982519],
       [-0.00046267,  0.05687127, -0.01783173,  0.00387103,  0.00541326,
        -0.00814264, -0.01554361, -0.03209648],
       [ 0.01588533,  0.00597321, -0.02245901, -0.01180513,  0.01279968,
        -0.05058927,  0.02326687, -0.00052942]], dtype=float32)>

In [40]:
layer5.losses # Both keras loss and custom loss are calculated

[<tf.Tensor: shape=(), dtype=float32, numpy=0.006554167>,
 <tf.Tensor: shape=(), dtype=float32, numpy=-0.001207883>]

In [41]:
# Making the custom layer serializable by updating get_config method
class SimpleLinear(layers.Layer):

  def __init__(self, units=8, **kwargs):

    super(SimpleLinear, self).__init__(**kwargs)
    self.units = units
  

  def build(self, input_shape):
    
    self.w = self.add_weight(shape=(input_shape[-1], self.units),
                             initializer='random_normal',
                             trainable=True)
    
    self.b = self.add_weight(shape=(self.units),
                             initializer='ones',
                             trainable=True)
    
  
  def call(self, input_tensor):
    return tf.matmul(input_tensor, self.w) + self.b
  

  def get_config(self):

    config = super(SimpleLinear, self).get_config()
    config.update({'units': self.units})

    return config

In [42]:
layer6 = SimpleLinear(units=3)
y = layer6(x)

In [44]:
config = layer6.get_config()
config

{'dtype': 'float32', 'name': 'simple_linear_9', 'trainable': True, 'units': 3}

In [45]:
new_layer6 = SimpleLinear.from_config(config)
new_layer6

<__main__.SimpleLinear at 0x7f9e407a9650>

In [46]:
new_layer6.get_config()

{'dtype': 'float32', 'name': 'simple_linear_9', 'trainable': True, 'units': 3}

In [47]:
# Layers with different behaviour in training and prediction phase
class CustomDropout(layers.Layer):

  def __init__(self, rate, **kwargs):

    super(CustomDropout, self).__init__(**kwargs)
    self.rate = rate
  

  def call(self, inputs, training=None):

    if training:
      return tf.nn.dropout(inputs, rate=self.rate)
    
    return inputs

# Custom Models

In [48]:
from tensorflow.keras import Model

In [49]:
class CustomRegressionModel(Model):

  def __init__(self, input_shape, layer_units=[8, 4],
               activation='relu', initializer='random_normal'):
    
    super(CustomRegressionModel, self).__init__()

    assert len(layer_units) > 0

    self.input_layer = layers.Dense(layer_units[0],
                                    activation=activation,
                                    kernel_initializer=initializer,
                                    input_shape=[input_shape])
    
    self.hidden_layers = []

    for i in range(1, len(layer_units)):
      self.hidden_layers.append(layers.Dense(layer_units[i],
                                             activation=activation,
                                             kernel_initializer=initializer))
      
    self.output_layer = layers.Dense(1)

  def call(self, input_tensor):

    x = self.input_layer(input_tensor)
    for layer in self.hidden_layers:
      x = layer(x)
    
    result = self.output_layer(x)

    return result


In [50]:
x = random_uniform_initializer((4, 8))
x

<tf.Tensor: shape=(4, 8), dtype=float32, numpy=
array([[ 0.01360508, -0.0391717 ,  0.03256488,  0.02987942,  0.00690515,
         0.0319689 ,  0.04661164, -0.03448243],
       [-0.04137808, -0.02963722,  0.02893592,  0.03774916,  0.02849284,
        -0.04949467,  0.04233085, -0.04126859],
       [-0.01628311, -0.01031269, -0.0166592 ,  0.00364785,  0.02429113,
         0.01654692,  0.03975267, -0.04636636],
       [ 0.01360584, -0.04213147,  0.02433132,  0.02548866,  0.01684747,
        -0.02154278, -0.01459755,  0.03087118]], dtype=float32)>

In [51]:
custom_reg_model = CustomRegressionModel(x.shape[-1], [8, 16, 32], 'sigmoid')
custom_reg_model.layers

[<keras.layers.core.dense.Dense at 0x7f9e44dec590>,
 <keras.layers.core.dense.Dense at 0x7f9e3ffa3590>,
 <keras.layers.core.dense.Dense at 0x7f9e3ffa3d90>,
 <keras.layers.core.dense.Dense at 0x7f9e401c2510>]

In [52]:
# Custom Model summary cannot be viewed before running at least one forward pass
custom_reg_model(x)
custom_reg_model.summary()

Model: "custom_regression_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_1 (Dense)             multiple                  72        
                                                                 
 dense_2 (Dense)             multiple                  144       
                                                                 
 dense_3 (Dense)             multiple                  544       
                                                                 
 dense_4 (Dense)             multiple                  33        
                                                                 
Total params: 793
Trainable params: 793
Non-trainable params: 0
_________________________________________________________________
