<a href="https://colab.research.google.com/github/Dmitri9149/TensorFlow-PyTorch-basics/blob/master/TensorFlow_Blocks_and_Layesr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf

The code is based on the d2l.ai book http://d2l.ai/

In [2]:
### the code is based on d2l.ai book 

In [3]:
### sequence of models , Dense with relu is the first in the chain
net = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256,activation=tf.nn.relu),
  tf.keras.layers.Dense(10)
])

X=tf.random.uniform((2,20))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 1.7263012e-01,  2.1431643e-01,  6.1796736e-02, -4.4129044e-01,
        -4.8060626e-01, -4.0500471e-01, -1.8431991e-04, -1.0192727e-01,
        -9.7754672e-02,  5.1312929e-01],
       [ 9.0565480e-02,  1.5937659e-01,  1.3637145e-01, -3.5661849e-01,
        -3.2038417e-01, -3.0459233e-02,  1.4607387e-02, -2.3882218e-02,
        -1.3652989e-01,  2.7688783e-01]], dtype=float32)>

In [4]:
net.call(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 1.7263012e-01,  2.1431643e-01,  6.1796736e-02, -4.4129044e-01,
        -4.8060626e-01, -4.0500471e-01, -1.8431991e-04, -1.0192727e-01,
        -9.7754672e-02,  5.1312929e-01],
       [ 9.0565480e-02,  1.5937659e-01,  1.3637145e-01, -3.5661849e-01,
        -3.2038417e-01, -3.0459233e-02,  1.4607387e-02, -2.3882218e-02,
        -1.3652989e-01,  2.7688783e-01]], dtype=float32)>

In [5]:
### Custom Block

In [6]:
class MLP(tf.keras.Model):
  def __init__(self):
    super().__init__()

    self.hidden=tf.keras.layers.Dense(units=256,activation=tf.nn.relu)
    self.out = tf.keras.layers.Dense(units=10)

  def call(self, X):
    return self.out(self.hidden((X)))

In [7]:
net=MLP()
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[-0.12769291, -0.25260958,  0.08343402,  0.20470409, -0.31387323,
         0.15593192, -0.3788177 ,  0.20900992, -0.10961298,  0.0514426 ],
       [ 0.06288157, -0.3623747 ,  0.18442152, -0.06001   , -0.3553054 ,
         0.08356073, -0.18182823,  0.04251212,  0.12329391,  0.09134473]],
      dtype=float32)>

In [8]:
class MySequential(tf.keras.Model):
  def __init__(self, *args):
    super().__init__()
    self.modules = []
    for block in args:
      self.modules.append(block)

  def call(self,X):
    for module in self.modules:
      X=module(X)
    return(X)


In [9]:
net = MySequential(
    tf.keras.layers.Dense(units=256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10))
net(X)

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[ 0.08934961,  0.4987669 ,  0.21602273,  0.28155765, -0.14894572,
        -0.41399235,  0.1760551 , -0.28952006, -0.17446782,  0.19280595],
       [ 0.05327718,  0.46403652,  0.23126954,  0.17242783, -0.18567531,
        -0.31692493,  0.41848895, -0.24408944, -0.04909487, -0.00834145]],
      dtype=float32)>

Below the Module is constructed where several models are applied in parallel on the same input data and the results are concatenated. It is intended to use the code in multihead attention in Transformers.

In [10]:
#initialize the list of n models which is constructed from the same model
#
def list_of_models(model, n):
  return [model]*n


In [11]:
list_models = [tf.keras.layers.Dense(10)]*3
arg_list = list_of_models(tf.keras.layers.Dense(10),3)

In [12]:
### use *args to supply a list of models of variable length 
class MyParallel(tf.keras.Model):
  def __init__(self, *args):
    super().__init__()
    self.modules = []
    for block in args:
      self.modules.append(block)

  def call(self,X):
    list_res= []
    for module in self.modules:
      list_res.append(module(X))
#      print(len(list_res))
#    print(list_res)
    concat_final= tf.concat(list_res, -1)
    return(concat_final)

In [13]:
net = MyParallel(tf.keras.layers.Dense(10), tf.keras.layers.Dense(10), tf.keras.layers.Dense(10))
X = tf.random.uniform((2,10))
net(X)

<tf.Tensor: shape=(2, 30), dtype=float32, numpy=
array([[-0.6265111 ,  0.03493553,  0.5463366 ,  0.09788583,  0.98766917,
         0.86607236, -1.0141735 ,  0.38054252, -1.1048366 ,  0.48305997,
        -0.32125568,  0.83885604, -0.4091549 ,  0.3872388 ,  0.13924542,
        -0.24876627,  0.80291384,  0.42212495, -0.14296646, -0.3387097 ,
         0.03151526,  0.7264527 ,  0.10984216, -0.9317466 , -0.4808729 ,
         0.951396  ,  0.04512585,  0.7286407 , -0.5385448 , -0.8669783 ],
       [ 0.6564251 , -0.19665673,  0.064319  , -0.33675522,  0.4889831 ,
         0.8243855 , -0.81144124,  0.19182771, -1.2137411 ,  0.33441728,
        -0.3233828 ,  0.21828537, -0.82481694,  0.2218912 , -0.22842976,
        -0.46956187,  0.46885934,  0.9828911 , -0.36073273, -0.05411854,
         0.3336642 ,  1.6485653 , -0.3277831 , -0.5855143 ,  0.07096586,
         1.0356281 , -0.20889251,  0.71922135, -0.58901817, -0.5973939 ]],
      dtype=float32)>

In [14]:
### to use the list_models (which is a list) we have to unpack it
net = MyParallel(*list_models)
X = tf.random.uniform((2,10))
net(X)

<tf.Tensor: shape=(2, 30), dtype=float32, numpy=
array([[ 0.43916833, -0.4916648 ,  0.12814364,  0.0731592 , -0.0019552 ,
         0.28563952,  0.43955463, -0.82320714, -0.506758  , -1.2878426 ,
         0.43916833, -0.4916648 ,  0.12814364,  0.0731592 , -0.0019552 ,
         0.28563952,  0.43955463, -0.82320714, -0.506758  , -1.2878426 ,
         0.43916833, -0.4916648 ,  0.12814364,  0.0731592 , -0.0019552 ,
         0.28563952,  0.43955463, -0.82320714, -0.506758  , -1.2878426 ],
       [ 0.5116131 , -0.18132922, -0.23721924,  0.24092817,  0.2930154 ,
         0.27862704,  0.8339441 , -1.1740276 , -0.36932296, -1.548611  ,
         0.5116131 , -0.18132922, -0.23721924,  0.24092817,  0.2930154 ,
         0.27862704,  0.8339441 , -1.1740276 , -0.36932296, -1.548611  ,
         0.5116131 , -0.18132922, -0.23721924,  0.24092817,  0.2930154 ,
         0.27862704,  0.8339441 , -1.1740276 , -0.36932296, -1.548611  ]],
      dtype=float32)>

In [15]:
net = MyParallel(*arg_list)
X = tf.random.uniform((2,10))
net(X)

<tf.Tensor: shape=(2, 30), dtype=float32, numpy=
array([[-0.22108719,  1.0575656 ,  0.4960766 , -0.89573157, -0.6078662 ,
        -0.11182718, -0.21006355,  0.0503401 ,  0.8280648 ,  2.0990157 ,
        -0.22108719,  1.0575656 ,  0.4960766 , -0.89573157, -0.6078662 ,
        -0.11182718, -0.21006355,  0.0503401 ,  0.8280648 ,  2.0990157 ,
        -0.22108719,  1.0575656 ,  0.4960766 , -0.89573157, -0.6078662 ,
        -0.11182718, -0.21006355,  0.0503401 ,  0.8280648 ,  2.0990157 ],
       [ 0.13306147,  0.9277452 ,  0.12003216, -0.1998664 ,  0.22129753,
         0.39661455, -0.49635756,  0.34123522,  0.572397  ,  1.8829432 ,
         0.13306147,  0.9277452 ,  0.12003216, -0.1998664 ,  0.22129753,
         0.39661455, -0.49635756,  0.34123522,  0.572397  ,  1.8829432 ,
         0.13306147,  0.9277452 ,  0.12003216, -0.1998664 ,  0.22129753,
         0.39661455, -0.49635756,  0.34123522,  0.572397  ,  1.8829432 ]],
      dtype=float32)>

In [16]:
### Nested Models 
class NestedBlocks(tf.keras.Model):
  def __init__(self):
    super().__init__()
    self.net=tf.keras.Sequential()
    self.net.add(tf.keras.layers.Dense(20,activation = tf.nn.relu))
    self.net.add(tf.keras.layers.Dense(20,activation = tf.nn.relu))
    self.dense = tf.keras.layers.Dense(20,activation = tf.nn.relu)

  def call(self, inputs):
    return self.dense(self.net(inputs))



In [17]:
net = NestedBlocks()
X = tf.random.uniform((2, 20))
net(X)

<tf.Tensor: shape=(2, 20), dtype=float32, numpy=
array([[0.        , 0.        , 0.1916143 , 0.5057243 , 0.21551774,
        0.        , 0.        , 0.        , 0.17900959, 0.28334907,
        0.        , 0.26462373, 0.        , 0.40603   , 0.        ,
        0.34378237, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.08558773, 0.40184224, 0.14577362,
        0.        , 0.        , 0.        , 0.13933332, 0.15979218,
        0.        , 0.16398239, 0.        , 0.3155504 , 0.        ,
        0.2669871 , 0.        , 0.        , 0.        , 0.        ]],
      dtype=float32)>

Parameter access

In [18]:
import numpy as np

net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(4, activation=tf.nn.relu),
    tf.keras.layers.Dense(1)
])

X = tf.random.uniform((2, 4))
net(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.3677348 ],
       [0.11355917]], dtype=float32)>

In [19]:
print(net.layers[2].weights)

[<tf.Variable 'dense_15/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[ 0.27844834],
       [ 0.3181486 ],
       [ 0.9125359 ],
       [-0.9706087 ]], dtype=float32)>, <tf.Variable 'dense_15/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]


In [20]:
print(type(net.layers[2].weights[1]))
print(net.layers[2].weights[1])
print(tf.convert_to_tensor(net.layers[2].weights[1]))

<class 'tensorflow.python.ops.resource_variable_ops.ResourceVariable'>
<tf.Variable 'dense_15/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>
tf.Tensor([0.], shape=(1,), dtype=float32)


In [21]:
print(net.layers[1].weights)
print(net.get_weights())

[<tf.Variable 'dense_14/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-0.08881402, -0.32916284,  0.51527685, -0.7802759 ],
       [ 0.59787554, -0.35355616,  0.30131763, -0.19972724],
       [ 0.5504481 ,  0.824084  , -0.33000797,  0.7529492 ],
       [-0.7628474 ,  0.31068665, -0.30919534, -0.01447111]],
      dtype=float32)>, <tf.Variable 'dense_14/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]
[array([[-0.08881402, -0.32916284,  0.51527685, -0.7802759 ],
       [ 0.59787554, -0.35355616,  0.30131763, -0.19972724],
       [ 0.5504481 ,  0.824084  , -0.33000797,  0.7529492 ],
       [-0.7628474 ,  0.31068665, -0.30919534, -0.01447111]],
      dtype=float32), array([0., 0., 0., 0.], dtype=float32), array([[ 0.27844834],
       [ 0.3181486 ],
       [ 0.9125359 ],
       [-0.9706087 ]], dtype=float32), array([0.], dtype=float32)]


In [22]:
net.get_weights()[1]

array([0., 0., 0., 0.], dtype=float32)

In [23]:
### collecting parameters from nested blocks
def block1(name):
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(4, activation=tf.nn.relu)],
        name=name)

def block2():
    net = tf.keras.Sequential()
    for i in range(4):
        # Nested here
        net.add(block1(name=f'block-{i}'))
    return net

rgnet = tf.keras.Sequential()
rgnet.add(block2())
rgnet.add(tf.keras.layers.Dense(1))
rgnet(X)

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[-0.01382869],
       [-0.01077355]], dtype=float32)>

In [24]:
print(rgnet.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential_4 (Sequential)    (2, 4)                    80        
_________________________________________________________________
dense_20 (Dense)             (2, 1)                    5         
Total params: 85
Trainable params: 85
Non-trainable params: 0
_________________________________________________________________
None


In [25]:
rgnet.layers[0].layers[1].layers[1].weights[1]

<tf.Variable 'sequential_4/block-1/dense_17/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>

In [26]:
net_prl = MyParallel(tf.keras.layers.Dense(4), tf.keras.layers.Dense(4), tf.keras.layers.Dense(4))
X = tf.random.uniform((1,4))
net_prl(X)

<tf.Tensor: shape=(1, 12), dtype=float32, numpy=
array([[ 0.73431635, -0.06339328,  0.17965657, -0.49208155,  0.46116024,
        -0.02776731,  0.7598322 ,  0.6055757 ,  0.4468705 ,  1.3513756 ,
         0.638832  , -0.7517152 ]], dtype=float32)>

In [27]:
print(net_prl.summary())

Model: "my_parallel_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_21 (Dense)             multiple                  20        
_________________________________________________________________
dense_22 (Dense)             multiple                  20        
_________________________________________________________________
dense_23 (Dense)             multiple                  20        
Total params: 60
Trainable params: 60
Non-trainable params: 0
_________________________________________________________________
None


In [28]:
net = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(
      4, activation = tf.nn.relu, 
      kernel_initializer = tf.random_normal_initializer(mean=0, stddev=0.01),
      bias_initializer=tf.zeros_initializer()
  ),
  tf.keras.layers.Dense(1)                                  
])
net(X)
net.weights

[<tf.Variable 'dense_24/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[ 0.0028787 , -0.01225488,  0.0054406 ,  0.00692397],
        [-0.00957665, -0.02282341,  0.0086571 ,  0.00398314],
        [ 0.00377422,  0.00510709,  0.01372571,  0.00540433],
        [-0.00198938,  0.01209311,  0.00795927, -0.00978171]],
       dtype=float32)>,
 <tf.Variable 'dense_24/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>,
 <tf.Variable 'dense_25/kernel:0' shape=(4, 1) dtype=float32, numpy=
 array([[ 0.26730108],
        [-0.1912573 ],
        [-0.28552192],
        [-0.2977556 ]], dtype=float32)>,
 <tf.Variable 'dense_25/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

In [29]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4, activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.Constant(1),
        bias_initializer=tf.zeros_initializer()),
    tf.keras.layers.Dense(1),
])

net(X)
net.weights[0], net.weights[1]

(<tf.Variable 'dense_26/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32)>,
 <tf.Variable 'dense_26/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>)

In [30]:
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(
        4,
        activation=tf.nn.relu,
        kernel_initializer=tf.keras.initializers.GlorotUniform()),
    tf.keras.layers.Dense(
        1, kernel_initializer=tf.keras.initializers.Constant(1)),
])

net(X)
print(net.layers[1].weights[0])
print(net.layers[2].weights[0])

<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[-0.47673595,  0.06076294, -0.28465432, -0.8403269 ],
       [ 0.5425888 ,  0.79675084, -0.82446146,  0.02563405],
       [-0.6462988 ,  0.8172081 , -0.07403815,  0.15194541],
       [ 0.526377  ,  0.8342189 , -0.6945702 , -0.23790413]],
      dtype=float32)>
<tf.Variable 'dense_29/kernel:0' shape=(4, 1) dtype=float32, numpy=
array([[1.],
       [1.],
       [1.],
       [1.]], dtype=float32)>


In [31]:
class MyInit(tf.keras.initializers.Initializer):
  def __init__(self,shape, dtype=None):
    data = tf.random.uniform(shape, -10,10,dtype=dtype)
    factor=(tf.abs.data >=5)
    factor=tf.cast(factor, tf.float32)
    return factor*data

net(X)
net.layers[1].weights

[<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
 array([[-0.47673595,  0.06076294, -0.28465432, -0.8403269 ],
        [ 0.5425888 ,  0.79675084, -0.82446146,  0.02563405],
        [-0.6462988 ,  0.8172081 , -0.07403815,  0.15194541],
        [ 0.526377  ,  0.8342189 , -0.6945702 , -0.23790413]],
       dtype=float32)>,
 <tf.Variable 'dense_28/bias:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

In [32]:
### assign parameters
net.layers[1].weights[0][:].assign(net.layers[1].weights[0] + 1)
net.layers[1].weights[0][0, 0].assign(42)
net.layers[1].weights[0]

<tf.Variable 'dense_28/kernel:0' shape=(4, 4) dtype=float32, numpy=
array([[42.        ,  1.0607629 ,  0.7153457 ,  0.1596731 ],
       [ 1.5425887 ,  1.7967508 ,  0.17553854,  1.025634  ],
       [ 0.35370117,  1.817208  ,  0.92596185,  1.1519454 ],
       [ 1.526377  ,  1.834219  ,  0.30542982,  0.76209587]],
      dtype=float32)>

In [33]:
# tf.keras behaves a bit differently. It removes the duplicate layer
# automatically
shared = tf.keras.layers.Dense(4, activation=tf.nn.relu)
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    shared,
    shared,
    tf.keras.layers.Dense(1),
])

net(X)
# Check whether the parameters are different
print(len(net.layers) == 3)

True


In [34]:
### Deffered Initialization
net = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation=tf.nn.relu),
  tf.keras.layers.Dense(10)                                  
])

In [35]:
### layers are empty
[net.layers[i].get_weights() for i in range(len(net.layers))]

[[], []]

In [36]:
### this code gives error because net the weights are not initilized yet
# net.get_weights()

In [37]:
X = tf.random.uniform((2,20))
net(X)
[w.shape for w in net.get_weights()]

[(20, 256), (256,), (256, 10), (10,)]

In [38]:
### Custom Layers
class CenteredLayer(tf.keras.Model):
    def __init__(self):
        super().__init__()

    def call(self, inputs):
        return inputs - tf.reduce_mean(inputs)

In [39]:
layer = CenteredLayer()
layer(tf.constant([1,2,3,4]))

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([-1,  0,  1,  2], dtype=int32)>

In [40]:
net = tf.keras.Sequential([tf.keras.layers.Dense(10), CenteredLayer()])

In [41]:
Y = net(tf.random.uniform((4,20)))
tf.reduce_mean(Y)

<tf.Tensor: shape=(), dtype=float32, numpy=7.450581e-09>

In [42]:
### Linear layer from scratch
class MyDense(tf.keras.Model):
    def __init__(self, out_units):
        super().__init__()
        self.out_units = out_units

    def build(self, X_shape):
        self.weight = self.add_weight(name='weight',
            shape=[X_shape[-1], self.out_units],
            initializer=tf.random_normal_initializer())
        self.bias = self.add_weight(
            name='bias', shape=[self.out_units],
            initializer=tf.zeros_initializer())

    def call(self, X):
        linear = tf.matmul(X, self.weight) + self.bias
        return tf.nn.relu(linear)

In [43]:
dense = MyDense(3)
dense(tf.random.uniform((2, 5)))
dense.get_weights()

[array([[-0.04220176, -0.03584058, -0.08537377],
        [ 0.05188091, -0.01124211,  0.002456  ],
        [ 0.09618523, -0.03220281,  0.02665074],
        [-0.01385284,  0.02855311,  0.01566815],
        [-0.03256511, -0.05471463,  0.00390545]], dtype=float32),
 array([0., 0., 0.], dtype=float32)]

In [44]:
dense(tf.random.uniform((2, 5)))

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[0.05333288, 0.        , 0.02426157],
       [0.        , 0.        , 0.        ]], dtype=float32)>

In [45]:
net = tf.keras.models.Sequential([MyDense(8), MyDense(1)])
net(tf.random.uniform((2, 64)))

<tf.Tensor: shape=(2, 1), dtype=float32, numpy=
array([[0.],
       [0.]], dtype=float32)>

In [46]:
### loading and saving tensors
x= tf.range(4)
np.save('x-file.npy',x)

In [47]:
x2 = np.load('x-file.npy', allow_pickle = True)
x2

array([0, 1, 2, 3], dtype=int32)

In [48]:
### save array of files
y = tf.zeros(4)
np.save('x-file.npy',[x,y])

In [49]:
x2, y2 = np.load('x-file.npy', allow_pickle=True)

In [50]:
### dictionary
mydic = {'x':x, 'y':y}
np.save('x-file.npy', mydic)
mydic = np.load('x-file.npy', allow_pickle=True)
mydic

array({'x': <tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 1, 2, 3], dtype=int32)>, 'y': <tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>},
      dtype=object)

In [51]:
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.hidden = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.out = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.hidden(x)
        return self.out(x)

net = MLP()
X = tf.random.uniform((2, 20))
Y = net(X)


In [52]:
net.save_weights('mlp.params')
clone = MLP()
clone.load_weights('mlp.params')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f4320041f60>

In [53]:
Y_clone = clone(X)
Y_clone == Y

<tf.Tensor: shape=(2, 10), dtype=bool, numpy=
array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True]])>

In [54]:
!nvidia-smi

Fri Dec  4 13:07:16 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.38       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   51C    P0    28W /  70W |    289MiB / 15079MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [55]:
tf.device('/CPU:0'), tf.device('/GPU:0'), tf.device('/GPU:1')

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f4320041b00>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f42d0186c88>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f42d0186b00>)

In [57]:
len(tf.config.experimental.list_physical_devices('GPU'))

1

In [58]:
def try_gpu(i=0):
    """Return gpu(i) if exists, otherwise return cpu()."""
    if len(tf.config.experimental.list_physical_devices('GPU')) >= i + 1:
        return tf.device(f'/GPU:{i}')
    return tf.device('/CPU:0')

def try_all_gpus():
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
    devices = [tf.device(f'/GPU:{i}') for i in range(num_gpus)]
    return devices if devices else [tf.device('/CPU:0')]

try_gpu(), try_gpu(10), try_all_gpus()

(<tensorflow.python.eager.context._EagerDeviceContext at 0x7f42c09e5208>,
 <tensorflow.python.eager.context._EagerDeviceContext at 0x7f42c09e52b0>,
 [<tensorflow.python.eager.context._EagerDeviceContext at 0x7f42c09e52e8>])

In [59]:
with try_gpu(1):
    Z = X
print(X)
print(Z)

tf.Tensor(
[[0.6307615  0.34837544 0.74484813 0.47429764 0.5553783  0.65340877
  0.37899852 0.95533526 0.86073875 0.5689478  0.83481157 0.37412727
  0.8626672  0.56218326 0.18310201 0.992628   0.13638139 0.5509567
  0.7868961  0.6950313 ]
 [0.47944427 0.20026004 0.21430802 0.28957903 0.37979293 0.88332486
  0.7723917  0.89428914 0.9870229  0.98848677 0.02067065 0.5172496
  0.05523169 0.9378047  0.46118283 0.57336044 0.79074466 0.04564321
  0.9285114  0.91544926]], shape=(2, 20), dtype=float32)
tf.Tensor(
[[0.6307615  0.34837544 0.74484813 0.47429764 0.5553783  0.65340877
  0.37899852 0.95533526 0.86073875 0.5689478  0.83481157 0.37412727
  0.8626672  0.56218326 0.18310201 0.992628   0.13638139 0.5509567
  0.7868961  0.6950313 ]
 [0.47944427 0.20026004 0.21430802 0.28957903 0.37979293 0.88332486
  0.7723917  0.89428914 0.9870229  0.98848677 0.02067065 0.5172496
  0.05523169 0.9378047  0.46118283 0.57336044 0.79074466 0.04564321
  0.9285114  0.91544926]], shape=(2, 20), dtype=float32)


In [61]:
X + Z

<tf.Tensor: shape=(2, 20), dtype=float32, numpy=
array([[1.261523  , 0.6967509 , 1.4896963 , 0.9485953 , 1.1107566 ,
        1.3068175 , 0.75799704, 1.9106705 , 1.7214775 , 1.1378956 ,
        1.6696231 , 0.74825454, 1.7253344 , 1.1243665 , 0.36620402,
        1.985256  , 0.27276278, 1.1019135 , 1.5737922 , 1.3900626 ],
       [0.95888853, 0.4005201 , 0.42861605, 0.57915807, 0.75958586,
        1.7666497 , 1.5447834 , 1.7885783 , 1.9740458 , 1.9769735 ,
        0.0413413 , 1.0344992 , 0.11046338, 1.8756094 , 0.92236567,
        1.1467209 , 1.5814893 , 0.09128642, 1.8570228 , 1.8308985 ]],
      dtype=float32)>