<a href="https://colab.research.google.com/github/Dmitri9149/TensorFlow-PyTorch-basics/blob/master/TensorFlow_Blocks_and_Layesr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

The code is based on the d2l.ai book http://d2l.ai/

In [2]:
### the code is based on d2l.ai book 

In [3]:
### sequence of models , Dense with relu is the first in the chain
net = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256,activation=tf.nn.relu),
  tf.keras.layers.Dense(10)
])

X=tf.random.uniform((2,20))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.00909282,  0.01374975, -0.06137768,  0.2827135 ,  0.16433828,
         0.017675  ,  0.02951406, -0.00791816, -0.01246743,  0.45645836],
       [ 0.20786038,  0.13381454, -0.04556976,  0.1745302 ,  0.10326056,
         0.03028696,  0.09641308, -0.15879628, -0.13908726,  0.12435369]],
      dtype=float32)>

In [4]:
net.call(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.00909282,  0.01374975, -0.06137768,  0.2827135 ,  0.16433828,
         0.017675  ,  0.02951406, -0.00791816, -0.01246743,  0.45645836],
       [ 0.20786038,  0.13381454, -0.04556976,  0.1745302 ,  0.10326056,
         0.03028696,  0.09641308, -0.15879628, -0.13908726,  0.12435369]],
      dtype=float32)>

In [5]:
### Custom Block

In [6]:
class MLP(tf.keras.Model):
  def __init__(self):
    super().__init__()

    self.hidden=tf.keras.layers.Dense(units=256,activation=tf.nn.relu)
    self.out = tf.keras.layers.Dense(units=10)

  def call(self, X):
    return self.out(self.hidden((X)))

In [7]:
net=MLP()
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.10386161,  0.14772847,  0.07590111,  0.00041206, -0.22431685,
         0.3106515 ,  0.34962907,  0.09930101, -0.28804976,  0.3125234 ],
       [ 0.029401  , -0.04084022,  0.27908334,  0.00829339, -0.29578227,
         0.06364293,  0.14512315,  0.06512737, -0.26229063,  0.2610535 ]],
      dtype=float32)>

In [8]:
class MySequential(tf.keras.Model):
  def __init__(self, *args):
    super().__init__()
    self.modules = []
    for block in args:
      self.modules.append(block)

  def call(self,X):
    for module in self.modules:
      X=module(X)
    return(X)


In [9]:
net = MySequential(
    tf.keras.layers.Dense(units=256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.16804029, -0.2983359 , -0.3905234 , -0.04714135, -0.06045905,
        -0.12263326,  0.06957128, -0.0960879 ,  0.15386027, -0.21005443],
       [ 0.10792722, -0.22052994, -0.14740622, -0.09415362, -0.1607841 ,
        -0.24880883,  0.20358746, -0.04025194,  0.11634333, -0.02602224]],
      dtype=float32)>

Below the Module is constructed where several models are applied in parallel on the same input data and the results are concatenated. It is intended to use the code in multihead attention in Transformers.

In [10]:
#initialize the list of n models which is constructed from the same model
#
def list_of_models(model, n):
  return [model]*n


In [11]:
list_models = [tf.keras.layers.Dense(10)]*3
arg_list = list_of_models(tf.keras.layers.Dense(10),3)

In [12]:
### use *args to supply a list of models of variable length 
class MyParallel(tf.keras.Model):
  def __init__(self, *args):
    super().__init__()
    self.modules = []
    for block in args:
      self.modules.append(block)

  def call(self,X):
    list_res= []
    for module in self.modules:
      list_res.append(module(X))
#      print(len(list_res))
#    print(list_res)
    concat_final= tf.concat(list_res, -1)
    return(concat_final)

In [13]:
net = MyParallel(tf.keras.layers.Dense(10), tf.keras.layers.Dense(10), tf.keras.layers.Dense(10))
X = tf.random.uniform((2,10))
net(X)

<tf.Tensor: shape=(2, 30), dtype=float32, numpy=
array([[-0.56124824, -0.38891086,  0.35382545, -0.9906275 , -0.5402892 ,
         0.49451846,  0.19466962,  0.21224272, -1.0865588 ,  0.7103232 ,
         0.02705622, -0.08997977, -0.37342167,  0.625508  ,  0.07295603,
        -0.65145755, -0.08856682, -0.3422256 , -1.2647176 , -0.5413022 ,
        -0.10384612, -0.4562428 , -0.6272959 , -0.34211698, -0.2750854 ,
         0.94105494,  0.18589485, -0.07349542,  0.22988643,  0.96972764],
       [-0.36661115, -0.4719828 ,  0.06597181, -0.8355848 , -0.58647263,
         0.64303195,  0.03227372,  0.4768671 , -0.8007686 ,  0.35421258,
         0.06726015,  0.07977167,  0.10896739,  0.52407306,  0.5189501 ,
        -1.0372953 , -0.18133628, -0.20285854, -1.3682258 , -0.62451136,
        -0.01978788, -0.4696629 , -0.22328262, -0.13282071, -0.9338706 ,
         0.9689894 ,  0.18261452,  0.22877276,  0.68559885,  0.30046397]],
      dtype=float32)>

In [14]:
### to use the list_models (which is a list) we have to unpack it
net = MyParallel(*list_models)
X = tf.random.uniform((2,10))
net(X)

<tf.Tensor: shape=(2, 30), dtype=float32, numpy=
array([[ 1.1712143 ,  0.35506618,  0.27431348, -0.48405725,  0.51399314,
        -0.38894856, -1.0032334 ,  1.0930847 ,  0.6921097 ,  1.5358344 ,
         1.1712143 ,  0.35506618,  0.27431348, -0.48405725,  0.51399314,
        -0.38894856, -1.0032334 ,  1.0930847 ,  0.6921097 ,  1.5358344 ,
         1.1712143 ,  0.35506618,  0.27431348, -0.48405725,  0.51399314,
        -0.38894856, -1.0032334 ,  1.0930847 ,  0.6921097 ,  1.5358344 ],
       [ 1.0266249 ,  0.04710215,  0.2816203 , -0.07991627,  0.2037133 ,
        -0.5349076 , -0.9093189 ,  1.0602528 ,  0.6188585 ,  0.53985655,
         1.0266249 ,  0.04710215,  0.2816203 , -0.07991627,  0.2037133 ,
        -0.5349076 , -0.9093189 ,  1.0602528 ,  0.6188585 ,  0.53985655,
         1.0266249 ,  0.04710215,  0.2816203 , -0.07991627,  0.2037133 ,
        -0.5349076 , -0.9093189 ,  1.0602528 ,  0.6188585 ,  0.53985655]],
      dtype=float32)>

In [15]:
net = MyParallel(*arg_list)
X = tf.random.uniform((2,10))
net(X)

<tf.Tensor: shape=(2, 30), dtype=float32, numpy=
array([[-1.0047772 ,  0.34146777, -0.59281015,  0.29828238,  0.03776546,
        -0.18795398,  0.14842464,  0.4939804 , -0.4818945 , -0.46737397,
        -1.0047772 ,  0.34146777, -0.59281015,  0.29828238,  0.03776546,
        -0.18795398,  0.14842464,  0.4939804 , -0.4818945 , -0.46737397,
        -1.0047772 ,  0.34146777, -0.59281015,  0.29828238,  0.03776546,
        -0.18795398,  0.14842464,  0.4939804 , -0.4818945 , -0.46737397],
       [-0.59279555,  0.03367037, -1.0668515 ,  0.13989648,  0.10434949,
         0.34627205,  0.4655439 ,  0.34844258,  0.19958179, -0.2087093 ,
        -0.59279555,  0.03367037, -1.0668515 ,  0.13989648,  0.10434949,
         0.34627205,  0.4655439 ,  0.34844258,  0.19958179, -0.2087093 ,
        -0.59279555,  0.03367037, -1.0668515 ,  0.13989648,  0.10434949,
         0.34627205,  0.4655439 ,  0.34844258,  0.19958179, -0.2087093 ]],
      dtype=float32)>

In [16]:
### Nested Models 
class NestedBlocks(tf.keras.Model):
  def __init__(self):
    super().__init__()
    self.net=tf.keras.Sequential()
    self.net.add(tf.keras.layers.Dense(20,activation = tf.nn.relu))
    self.net.add(tf.keras.layers.Dense(20,activation = tf.nn.relu))
    self.dense = tf.keras.layers.Dense(20,activation = tf.nn.relu)

  def call(self, inputs):
    return self.dense(self.net(inputs))



In [17]:
net = NestedBlocks()
X = tf.random.uniform((2, 20))
net(X)

<tf.Tensor: shape=(2, 20), dtype=float32, numpy=
array([[0.        , 0.        , 0.04393809, 0.        , 0.00760015,
        0.11003554, 0.        , 0.04990879, 0.10807315, 0.        ,
        0.        , 0.        , 0.04251675, 0.        , 0.        ,
        0.        , 0.2316497 , 0.22356923, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.22502428,
        0.        , 0.        , 0.31385317, 0.18847409, 0.18041375,
        0.        , 0.04677206, 0.        , 0.        , 0.        ,
        0.        , 0.38573745, 0.05432643, 0.        , 0.        ]],
      dtype=float32)>

Parameter access

In [18]:
import numpy as np

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1)
])

X = tf.random.uniform((2, 4))
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[ 0.        ],
       [-0.00472663]], dtype=float32)>

In [19]:
print(net.layers[2].weights)

[<tf.Variable 'dense_15/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[-0.35243016],
       [-0.13560206],
       [-0.02524698],
       [ 0.673424  ]], dtype=float32)>, <tf.Variable 'dense_15/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]


In [20]:
print(type(net.layers[2].weights[1]))
print(net.layers[2].weights[1])
print(tf.convert_to_tensor(net.layers[2].weights[1]))

<class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
<tf.Variable 'dense_15/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>
tf.Tensor([0.], shape=(1,), dtype=float32)


In [21]:
print(net.layers[1].weights)
print(net.get_weights())

[<tf.Variable 'dense_14/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-0.5817772 , -0.44365552, -0.6431604 , -0.7945558 ],
       [-0.3782673 ,  0.12490118, -0.3444708 ,  0.10040689],
       [ 0.34444815,  0.10886186,  0.29199153, -0.75480783],
       [ 0.00362861, -0.8353254 ,  0.80649513, -0.54661834]],
      dtype=float32)>, <tf.Variable 'dense_14/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]
[array([[-0.5817772 , -0.44365552, -0.6431604 , -0.7945558 ],
       [-0.3782673 ,  0.12490118, -0.3444708 ,  0.10040689],
       [ 0.34444815,  0.10886186,  0.29199153, -0.75480783],
       [ 0.00362861, -0.8353254 ,  0.80649513, -0.54661834]],
      dtype=float32), array([0., 0., 0., 0.], dtype=float32), array([[-0.35243016],
       [-0.13560206],
       [-0.02524698],
       [ 0.673424  ]], dtype=float32), array([0.], dtype=float32)]


In [22]:
net.get_weights()[1]

array([0., 0., 0., 0.], dtype=float32)

In [23]:
### collecting parameters from nested blocks
def block1(name):
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation=tf.nn.relu)],
        name=name)

def block2():
    net = tf.keras.Sequential()
    for i in range(4):
        # Nested here
        net.add(block1(name=f'block-{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2())
rgnet.add(tf.keras.layers.Dense(1))
rgnet(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.00672365],
       [-0.02152235]], dtype=float32)>

In [24]:
print(rgnet.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_4 (Sequential)    (2, 4)                    80        
_________________________________________________________________
dense_20 (Dense)             (2, 1)                    5         
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________
None


In [25]:
rgnet.layers[0].layers[1].layers[1].weights[1]

<tf.Variable 'sequential_4/block-1/dense_17/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>

In [26]:
net_prl = MyParallel(tf.keras.layers.Dense(4), tf.keras.layers.Dense(4), tf.keras.layers.Dense(4))
X = tf.random.uniform((1,4))
net_prl(X)

<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
array([[ 0.81099457,  0.19220386,  0.16658069, -0.07737514,  0.28590167,
         0.5230656 , -0.4353081 , -0.03039999, -0.09708518, -0.36586413,
        -0.3536003 ,  0.10420385]], dtype=float32)>

In [27]:
print(net_prl.summary())

Model: "my_parallel_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             multiple                  20        
_________________________________________________________________
dense_22 (Dense)             multiple                  20        
_________________________________________________________________
dense_23 (Dense)             multiple                  20        
Total params: 60
Trainable params: 60
Non-trainable params: 0
_________________________________________________________________
None


In [28]:
net = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(
      4, activation = tf.nn.relu, 
      kernel_initializer = tf.random_normal_initializer(mean=0, stddev=0.01),
      bias_initializer=tf.zeros_initializer()
  ),
  tf.keras.layers.Dense(1)                                  
])
net(X)
net.weights

[<tf.Variable 'dense_24/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[ 0.02035885,  0.01135421, -0.00145491,  0.00264139],
        [-0.00676935,  0.00625156,  0.00262907, -0.0159021 ],
        [-0.00916125,  0.01898468, -0.00277208, -0.01571836],
        [-0.00306712,  0.00142435,  0.00065452, -0.00520501]],
       dtype=float32)>,
 <tf.Variable 'dense_24/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>,
 <tf.Variable 'dense_25/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[ 0.10517943],
        [ 1.0334432 ],
        [-1.0305837 ],
        [-0.74189115]], dtype=float32)>,
 <tf.Variable 'dense_25/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

In [29]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_26/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'dense_26/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [30]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(
        1, kernel_initializer=tf.keras.initializers.Constant(1)),
])

net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])

<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[ 0.4765696 , -0.69300777, -0.16482627, -0.4676735 ],
       [ 0.23094362,  0.65221995, -0.635887  ,  0.04500878],
       [-0.21136963,  0.49000472, -0.08565372,  0.15789896],
       [ 0.46441942, -0.5388297 ,  0.00678027,  0.3247164 ]],
      dtype=float32)>
<tf.Variable 'dense_29/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[1.],
       [1.],
       [1.],
       [1.]], dtype=float32)>


In [31]:
class MyInit(tf.keras.initializers.Initializer):
  def __init__(self,shape, dtype=None):
    data = tf.random.uniform(shape, -10,10,dtype=dtype)
    factor=(tf.abs.data >=5)
    factor=tf.cast(factor, tf.float32)
    return factor*data

net(X)
net.layers[1].weights

[<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[ 0.4765696 , -0.69300777, -0.16482627, -0.4676735 ],
        [ 0.23094362,  0.65221995, -0.635887  ,  0.04500878],
        [-0.21136963,  0.49000472, -0.08565372,  0.15789896],
        [ 0.46441942, -0.5388297 ,  0.00678027,  0.3247164 ]],
       dtype=float32)>,
 <tf.Variable 'dense_28/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

In [32]:
### assign parameters
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]

<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[42.        ,  0.30699223,  0.8351737 ,  0.53232646],
       [ 1.2309437 ,  1.65222   ,  0.36411297,  1.0450088 ],
       [ 0.78863037,  1.4900048 ,  0.9143463 ,  1.1578989 ],
       [ 1.4644194 ,  0.46117032,  1.0067803 ,  1.3247163 ]],
      dtype=float32)>

In [33]:
# tf.keras behaves a bit differently. It removes the duplicate layer
# automatically
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1),
])

net(X)
# Check whether the parameters are different
print(len(net.layers) == 3)

True


In [39]:
### Deffered Initialization
net = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(10)                                  
])

In [41]:
### layers are empty
[net.layers[i].get_weights() for i in range(len(net.layers))]

[[], []]

In [44]:
### this code gives error because net the weights are not initilized yet
# net.get_weights()

In [45]:
X = tf.random.uniform((2,20))
net(X)
[w.shape for w in net.get_weights()]

[(20, 256), (256,), (256, 10), (10,)]