In [28]:
import tensorflow as tf
import numpy as np

# Eager execution  
## An example of h = sigmoid(W * x + b)

In [4]:
x = tf.constant([[1., 2., 3.], [4., 5., 6.]])
w = tf.Variable(tf.random.normal([3, 2]))
b = tf.Variable(tf.random.normal([2]))
z = tf.matmul(x, w) + b
y = tf.nn.sigmoid(z)
print(y)

tf.Tensor(
[[0.7753509  0.93878555]
 [0.9532169  0.9968606 ]], shape=(2, 2), dtype=float32)


## An end_to_end eager execution example of linear model
Create dataset

In [5]:
NUM_EXAMPLES = 2000
x = tf.random.normal([NUM_EXAMPLES, 10])
noise = tf.random.normal([NUM_EXAMPLES, 5])
w_true = tf.ones([10, 5])
w_true = w_true * 3
b_true = tf.ones([5])
y = tf.matmul(x, w_true) + b_true + noise
print(f'x sample: {x[0:1]}')
print(f'y sample: {y[0:1]}')

x sample: [[-1.594505   -0.8215249  -1.279077   -0.10726237  1.075075    0.91573244
  -1.0122005   1.3771311  -0.8910995  -1.69264   ]]
y sample: [[-12.0410795 -11.230551  -12.571251  -11.151134  -11.924894 ]]


Build linear model y = x * w + b

In [6]:
class Linear(tf.keras.Model):
  def __init__(self):
    super(Linear, self).__init__()
    self.W = tf.Variable(tf.random.normal([10, 5]), name='weight')
    self.B = tf.Variable(tf.random.normal([5]), name='bias')
  def call(self, inputs):
    return tf.matmul(inputs, self.W) + self.B

Create model instance and choose SGD as the optimizer

In [7]:
model = Linear()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

Training process

In [18]:
steps = 100
for i in range(steps):
  with tf.GradientTape() as tape:
    #loss
    error = model(x) - y
    loss_value = tf.reduce_mean(tf.square(error))
  grads = tape.gradient(loss_value, [model.W, model.B])
  optimizer.apply_gradients(zip(grads, [model.W, model.B]))
  if i % 20 == 0:
    print(f"Loss at step {i}: {loss_value}")
print(f"W = {model.W.numpy()}, B = {model.B.numpy()}")

Loss at step 0: 0.9959805607795715
Loss at step 20: 0.9947214722633362
Loss at step 40: 0.993640124797821
Loss at step 60: 0.9927115440368652
Loss at step 80: 0.9919135570526123
W = [[3.013761  2.972824  2.9576807 2.9959006 2.9828606]
 [2.9921997 2.9522212 2.9901388 2.981263  2.9870505]
 [2.983197  2.9589314 3.0155313 3.0105908 3.0207992]
 [3.002246  2.9900434 2.9911227 3.0125127 2.9714382]
 [2.972854  2.9398937 3.0158775 2.9727569 3.0097048]
 [3.0214002 2.9796052 3.002326  3.0160155 3.0155087]
 [2.9566758 2.9727437 2.9714837 2.9215772 2.9489958]
 [2.9522686 3.001058  2.9785402 2.97918   3.0170207]
 [2.9413743 2.9621522 2.9582999 2.9600418 2.9680848]
 [2.970826  2.986206  2.9471984 2.9669378 2.975441 ]], B = [1.0471543  0.98861533 1.0299664  0.9623249  0.96185046]


save, reload model and prediction

In [19]:
# save
model.save_weights('weights')
del model
# load
model = Linear()
model.load_weights('weights')
# predict
x_new = tf.random.normal([1, 10])
print(x_new)
print(model.predict(x_new))

tf.Tensor(
[[-0.6687912 -1.6843041  0.6103643 -1.7633623  0.8536264 -0.3445467
  -0.8255835  0.8244974  0.6084509  1.0568212]], shape=(1, 10), dtype=float32)
[[-3.0622463 -2.977241  -2.9610436 -3.057906  -2.92659  ]]


# Autograph mode
## tf.function decorator
Put @tf.function on top of the function, TensorFlow 2.x will construct the graph automatically

In [22]:
@tf.function
def sigmoid_layer(x, w):
    return tf.nn.sigmoid(tf.matmul(x, w))

x = tf.random.uniform((2, 5))
w = tf.random.uniform((5, 2))
sigmoid_layer(x, w)


<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[0.63038623, 0.58002436],
       [0.7083639 , 0.666897  ]], dtype=float32)>

tf.function used in multiple functions

In [23]:
def inner_function(x, y, b):
  x = tf.matmul(x, y)
  x = x + b
  return x

@tf.function
def outer_function(x):
  y = tf.constant([[2.0], [3.0]])
  b = tf.constant(4.0)

  return inner_function(x, y, b)

outer_function(tf.constant([[1.0, 2.0]])).numpy()

array([[12.]], dtype=float32)

tf.function used in model

In [24]:
class CustomModel(tf.keras.models.Model):
    @tf.function
    def call(self, input_data):
        if tf.reduce_mean(input_data) > 0:
            return input_data
        else:
            return input_data // 2

model = CustomModel()
model(tf.constant([-2, -4]))

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([-1, -2], dtype=int32)>

# Basic operations in TensorFlow 2.x. 
## Tensor creation
Create a Tensor with python basic data types and numpy array


In [29]:
# int
print(tf.constant(1))
# float
print(tf.constant(1.0))
# double
print(tf.constant(1.0, dtype=tf.float64))
# bool
print(tf.constant(True))
# string
print(tf.constant('tensorflow'))
# python list
print(tf.constant([1., 2., 3.]))
# numpy list
print(tf.constant(np.array([1., 2., 3.])))
print(tf.constant(np.array([1., 2., 3.]), dtype=tf.float32))

tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float64)
tf.Tensor(True, shape=(), dtype=bool)
tf.Tensor(b'tensorflow', shape=(), dtype=string)
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32)
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float64)
tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32)


Create Tensors by tf.constant and tf.convert_to_tensor. They return the same result.

In [30]:
print(tf.constant([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]))
print(tf.convert_to_tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]))

tf.Tensor(
[[0.1 0.2 0.3]
 [0.4 0.5 0.6]], shape=(2, 3), dtype=float32)
tf.Tensor(
[[0.1 0.2 0.3]
 [0.4 0.5 0.6]], shape=(2, 3), dtype=float32)


Create Tensors by tf.random

In [31]:
# tf.random.normal
print(tf.random.normal([2, 2], mean=0, stddev=1))
# tf.random.truncated_normal
print(tf.random.truncated_normal([2,2], mean=0, stddev=1, dtype=tf.double))
# tf.random.uniform
print(tf.random.uniform([2, 2], minval=0, maxval=1))

tf.Tensor(
[[ 0.6625773   1.1615977 ]
 [ 0.93601036 -0.64112324]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 0.80185249 -1.97282165]
 [-0.18084448 -0.60388566]], shape=(2, 2), dtype=float64)
tf.Tensor(
[[0.8133954  0.16859221]
 [0.42237675 0.34134817]], shape=(2, 2), dtype=float32)


Create Tensors by special value function of TensorFlow

In [32]:
# tf.ones
print(tf.ones(shape=(1,3),dtype=tf.float32))
# tf.zeors
print(tf.zeros(shape=(1,3),dtype=tf.float32))
# tf.eye
print(tf.eye(4,dtype=tf.float32))
# tf.ones_like
print(tf.ones_like(tf.linspace(0.0, 1.0, 4)))
# tf.zeros_like
print(tf.zeros_like(tf.linspace(0.0, 1.0, 4)))
# tf.linspace
print(tf.linspace(0.0,1.0, 3))
# tf.fill
print(tf.fill((1,5), 1.0))

tf.Tensor([[1. 1. 1.]], shape=(1, 3), dtype=float32)
tf.Tensor([[0. 0. 0.]], shape=(1, 3), dtype=float32)
tf.Tensor(
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]], shape=(4, 4), dtype=float32)
tf.Tensor([1. 1. 1. 1.], shape=(4,), dtype=float32)
tf.Tensor([0. 0. 0. 0.], shape=(4,), dtype=float32)
tf.Tensor([0.  0.5 1. ], shape=(3,), dtype=float32)
tf.Tensor([[1. 1. 1. 1. 1.]], shape=(1, 5), dtype=float32)


## Tensor merge and split. 
Tensor merge

In [33]:
a = tf.random.normal([1,2,3])
b = tf.random.normal([2,2,3])
c = tf.concat([a,b], axis=0)
print(c)
a = tf.random.normal([2,3])
b = tf.random.normal([2,3])
c = tf.stack([a,b], axis = 0)
print(c)
d = tf.stack([a,b], axis = -1)
print(d)


tf.Tensor(
[[[-1.8005704e+00 -9.7591424e-01  1.1928231e+00]
  [ 2.8865596e-02  1.1421562e+00 -1.9990268e+00]]

 [[ 1.7572212e-03  6.1318332e-01 -1.6612515e+00]
  [ 2.2820819e-02  2.2672336e-01 -5.0935364e-01]]

 [[ 1.3486729e+00 -1.2956600e+00  1.9418643e+00]
  [ 1.4996222e+00 -5.9182322e-01  2.6410298e-02]]], shape=(3, 2, 3), dtype=float32)
tf.Tensor(
[[[-0.0529381  -1.6127988  -0.94847506]
  [-0.6705818   1.0892807  -0.47288036]]

 [[ 1.0355864   0.09805752  0.15576431]
  [-0.1160759   0.91408455 -0.5691967 ]]], shape=(2, 2, 3), dtype=float32)
tf.Tensor(
[[[-0.0529381   1.0355864 ]
  [-1.6127988   0.09805752]
  [-0.94847506  0.15576431]]

 [[-0.6705818  -0.1160759 ]
  [ 1.0892807   0.91408455]
  [-0.47288036 -0.5691967 ]]], shape=(2, 3, 2), dtype=float32)


Tensor split

In [34]:
x = tf.random.normal([3,2,3])
print(x)
result = tf.split(x, axis=0, num_or_size_splits =3)
print(result)

x = tf.random.normal([3,2,3])
print(x)
result = tf.unstack(x,axis = 0)
print(result)

tf.Tensor(
[[[ 0.6467815   0.29698062  1.0185219 ]
  [ 0.4643234  -1.0227649  -0.09599734]]

 [[ 0.23131241  0.3414451  -0.00345733]
  [ 0.7526229  -0.10908841 -0.48728976]]

 [[-0.65192187  0.0567714  -0.14992504]
  [-1.1408972  -0.7262049  -0.539716  ]]], shape=(3, 2, 3), dtype=float32)
[<tf.Tensor: shape=(1, 2, 3), dtype=float32, numpy=
array([[[ 0.6467815 ,  0.29698062,  1.0185219 ],
        [ 0.4643234 , -1.0227649 , -0.09599734]]], dtype=float32)>, <tf.Tensor: shape=(1, 2, 3), dtype=float32, numpy=
array([[[ 0.23131241,  0.3414451 , -0.00345733],
        [ 0.7526229 , -0.10908841, -0.48728976]]], dtype=float32)>, <tf.Tensor: shape=(1, 2, 3), dtype=float32, numpy=
array([[[-0.65192187,  0.0567714 , -0.14992504],
        [-1.1408972 , -0.7262049 , -0.539716  ]]], dtype=float32)>]
tf.Tensor(
[[[-0.52960604 -0.6018969   0.9589493 ]
  [-1.0142052  -1.0414324  -0.33069423]]

 [[-1.3156068  -1.4781188  -0.02937897]
  [ 1.4955163  -0.98119545 -1.0819533 ]]

 [[-1.0677149   0.5031118   0.

## Tensor comparison and sort
Tensor comparison

In [35]:
y = tf.random.uniform([10],dtype=tf.int64,maxval=10)
print(y)
y_hat = tf.random.uniform([10],dtype=tf.int64,maxval=10)
print(y_hat)
result = tf.equal(y_hat,y) 
print(result)
print(y_hat > 5)

tf.Tensor([8 4 7 9 3 0 7 5 7 4], shape=(10,), dtype=int64)
tf.Tensor([1 1 2 5 4 0 1 1 3 9], shape=(10,), dtype=int64)
tf.Tensor([False False False False False  True False False False False], shape=(10,), dtype=bool)
tf.Tensor([False False False False False False False False False  True], shape=(10,), dtype=bool)


Tensor sort and related operations

In [36]:
tensor = tf.random.uniform([10], maxval=10)
print(tensor)
# sort
print(tf.sort(tensor, direction='DESCENDING'))
# indice of sort result
print(tf.argsort(tensor, direction='DESCENDING'))
# top k
print(tf.math.top_k(tensor, 3))
# set all elements greater than 4
print(tf.maximum(tensor, 5))
# set all elements smaller than 4
print(tf.minimum(tensor, 5))
# set all elements greater than 3 smaller than 7
print(tf.clip_by_value(tensor, 3, 7))

tf.Tensor(
[6.8241525 7.5564585 8.524065  1.9827163 8.975982  4.325111  1.4330673
 1.077888  4.009632  6.6039777], shape=(10,), dtype=float32)
tf.Tensor(
[8.975982  8.524065  7.5564585 6.8241525 6.6039777 4.325111  4.009632
 1.9827163 1.4330673 1.077888 ], shape=(10,), dtype=float32)
tf.Tensor([4 2 1 0 9 5 8 3 6 7], shape=(10,), dtype=int32)
TopKV2(values=<tf.Tensor: shape=(3,), dtype=float32, numpy=array([8.975982 , 8.524065 , 7.5564585], dtype=float32)>, indices=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([4, 2, 1], dtype=int32)>)
tf.Tensor(
[6.8241525 7.5564585 8.524065  5.        8.975982  5.        5.
 5.        5.        6.6039777], shape=(10,), dtype=float32)
tf.Tensor(
[5.        5.        5.        1.9827163 5.        4.325111  1.4330673
 1.077888  4.009632  5.       ], shape=(10,), dtype=float32)
tf.Tensor(
[6.8241525 7.        7.        3.        7.        4.325111  3.
 3.        4.009632  6.6039777], shape=(10,), dtype=float32)


Dimension reduction function tf.reduce_*

In [37]:
tensor = tf.constant([[1,2],[3,4]], dtype=tf.float32)
# max in dimension 1
print(tf.reduce_max(tensor, axis=1))
# min in dimension 0  
print(tf.reduce_min(tensor, axis=0)) 
# average, default global dimension
print(tf.reduce_mean(tensor)) 
# index of max value in dimension 0        
print(tf.argmax(tensor, axis=0)) 
# index of min value in dimension 1
print(tf.argmin(tensor, axis=1))      

tf.Tensor([2. 4.], shape=(2,), dtype=float32)
tf.Tensor([1. 2.], shape=(2,), dtype=float32)
tf.Tensor(2.5, shape=(), dtype=float32)
tf.Tensor([1 1], shape=(2,), dtype=int64)
tf.Tensor([0 0], shape=(2,), dtype=int64)


Dimension reduction function tf.reduce_* used in loss computation

In [39]:
y_hat = tf.random.normal([10,5]) 
y = tf.constant([1, 2, 3, 4, 5, 1, 2, 3, 4, 5])
y = tf.one_hot(y,depth=5)
loss = tf.keras.losses.mse(y,y_hat) 
loss_mean = tf.reduce_mean(loss)
print(loss_mean)

loss_max = tf.reduce_max(loss)
loss_min = tf.reduce_min(loss)
idx_max = tf.argmax(loss)
idx_min = tf.argmin(loss)
print(loss)
print(loss_max)
print(loss_min)
print(idx_max)
print(idx_min)


tf.Tensor(1.1923487, shape=(), dtype=float32)
tf.Tensor(
[0.5447431  1.7428942  0.85549915 1.5556564  2.3466215  2.0873663
 0.26326865 2.0251179  0.23885174 0.2634689 ], shape=(10,), dtype=float32)
tf.Tensor(2.3466215, shape=(), dtype=float32)
tf.Tensor(0.23885174, shape=(), dtype=float32)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)


## Mathematical operations

In [None]:
a = tf.constant([[1,2],[3,4]], dtype=tf.float32)
b = tf.constant([[5,6],[7,8]], dtype=tf.float32)
# element-wise add
print('a+b=', a+b)  
# element-wise minus
print('a-b=', a-b)  
# element-wise multipy
print('a*b=', a*b)  
# element-wise divide
print('a/b=', a/b)  
# element-wise 
print('a//b', a//b) # 元素地板除
# element-wise remainder
print('b%a=', b%a) 
# square
print(a**2)
# square
print(tf.pow(a, 2.0))
# square
print(tf.square(a))
# square root
print(tf.sqrt(a))
# exp
print(tf.exp(a))
# log
print(tf.math.log(a))
# maxtrix multipy
print(tf.matmul(a, b))

a+b= tf.Tensor(
[[ 6.  8.]
 [10. 12.]], shape=(2, 2), dtype=float32)
a-b= tf.Tensor(
[[-4. -4.]
 [-4. -4.]], shape=(2, 2), dtype=float32)
a*b= tf.Tensor(
[[ 5. 12.]
 [21. 32.]], shape=(2, 2), dtype=float32)
a/b= tf.Tensor(
[[0.2        0.33333334]
 [0.42857143 0.5       ]], shape=(2, 2), dtype=float32)
a//b tf.Tensor(
[[0. 0.]
 [0. 0.]], shape=(2, 2), dtype=float32)
b%a= tf.Tensor(
[[0. 0.]
 [1. 0.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 1.  4.]
 [ 9. 16.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 1.  4.]
 [ 9. 16.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 1.  4.]
 [ 9. 16.]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[1.        1.4142135]
 [1.7320508 2.       ]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 2.7182817  7.389056 ]
 [20.085537  54.59815  ]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[0.        0.6931472]
 [1.0986123 1.3862944]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[19. 22.]
 [43. 50.]], shape=(2, 2), dtype=float32)


## Neural network-related operations  
### The convolution operation

In [40]:
x = tf.constant( [[
[[1],[2],[3],[4]],
[[4],[3],[2],[1]],
[[5],[6],[7],[8]],
[[8],[7],[6],[5]]
]],
dtype=tf.float32)

x_filter = tf.constant( [
[
[[0.5]],[[1]]
], [
[[0.5]],[[1]]
]
],
dtype=tf.float32)

x_stride = [1,1,1,1] 
x_padding = 'VALID'
# by tf.nn
x_conv = tf.nn.conv2d( input=x, filters=x_filter, strides=x_stride, padding=x_padding)
print(x_conv)
# by tf.keras
x_conv_keras = tf.keras.layers.Conv2D(filters=1, kernel_size=2, padding=x_padding)(x)
print(x_conv_keras)

tf.Tensor(
[[[[ 7.5]
   [ 7.5]
   [ 7.5]]

  [[13.5]
   [13.5]
   [13.5]]

  [[19.5]
   [19.5]
   [19.5]]]], shape=(1, 3, 3, 1), dtype=float32)
tf.Tensor(
[[[[ 1.2286534 ]
   [ 3.120226  ]
   [ 5.011798  ]]

  [[ 2.133686  ]
   [ 0.24211407]
   [-1.6494584 ]]

  [[ 5.3117876 ]
   [ 7.2033596 ]
   [ 9.094933  ]]]], shape=(1, 3, 3, 1), dtype=float32)


### The convolution operation by tf.keras

In [41]:
x = tf.constant( [[
[[1],[2],[3],[4]],
[[4],[3],[2],[1]],
[[5],[6],[7],[8]],
[[8],[7],[6],[5]]
]],
dtype=tf.float32)
x_conv_keras = tf.keras.layers.Conv2D(filters=1, kernel_size=2, padding=x_padding)(x)
print(x_conv_keras)

tf.Tensor(
[[[[-1.9689597 ]
   [-0.376841  ]
   [ 1.2152777 ]]

  [[-2.8352392 ]
   [-4.4273577 ]
   [-6.019477  ]]

  [[-2.894652  ]
   [-1.3025334 ]
   [ 0.28958535]]]], shape=(1, 3, 3, 1), dtype=float32)


### The pooling operation

In [43]:
x = tf.constant( [[
[[1],[2],[3],[4]],
[[4],[3],[2],[1]],
[[5],[6],[7],[8]],
[[8],[7],[6],[5]]
]],
dtype=tf.float32)
x_ksize = [1,2,2,1]
x_stride = [1,2,2,1] 
x_padding = 'VALID'
x_pool = tf.nn.max_pool(input=x, ksize=x_ksize, strides=x_stride, padding=x_padding)
print(x_pool)

tf.Tensor(
[[[[4.]
   [4.]]

  [[8.]
   [8.]]]], shape=(1, 2, 2, 1), dtype=float32)


The pooling operation by tf.keras

In [44]:
x = tf.constant( [[
[[1],[2],[3],[4]],
[[4],[3],[2],[1]],
[[5],[6],[7],[8]],
[[8],[7],[6],[5]]
]],
dtype=tf.float32)
x_pool_keras = tf.keras.layers.MaxPool2D()(x)
print(x_pool_keras)

tf.Tensor(
[[[[4.]
   [4.]]

  [[8.]
   [8.]]]], shape=(1, 2, 2, 1), dtype=float32)


### Defining loss

In [47]:
# Returns half of L2 norm of t given by sum(t**2)/2 
x = tf.constant([[2,4],[6,8]], dtype=tf.float32) 
x_hat = tf.constant([[1,2],[3,4]],dtype=tf.float32) 
# MSE = (1**2 + 2**2 + 3**2 + 4**2)/2 = 15
MSE = tf.nn.l2_loss(x-x_hat)
print(MSE)

tf.Tensor(15.0, shape=(), dtype=float32)


A common loss function used in neural networks to optimize the network

In [48]:
# Calculate the cross_entropy with logits (unnormalized outputs of the last layer)
# instead of outputs leads to better numerical stabilities
y = tf.constant([[1,0],[0,1]], dtype=tf.float32) 
y_hat = tf.constant([[3,1],[2,5]], dtype=tf.float32)
# This function alone doesn’t average the cross entropy losses of all data points,
# You need to do that manually using reduce_mean function
CE = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=y_hat))
print(CE)

tf.Tensor(0.08775769, shape=(), dtype=float32)


Loss by tf.keras

In [54]:
x = tf.constant([[2,4],[6,8]], dtype=tf.float32)
x_hat = tf.constant([[1,2],[3,4]], dtype=tf.float32)
MSE_keras = tf.reduce_sum(tf.keras.losses.MSE(x, x_hat))
print(MSE_keras)

y = tf.constant([[1,0],[0,1]], dtype=tf.float32)
y_hat = tf.constant([[3,1],[2,5]],dtype=tf.float32)
CE_keras = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y, y_hat, from_logits=True))
print(CE_keras)

tf.Tensor(15.0, shape=(), dtype=float32)
tf.Tensor(0.08775769, shape=(), dtype=float32)


### Automatic differentiation and gradientspe

In [59]:
# trainable variables in a model is watched by default
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])
with tf.GradientTape() as tape:
  y = layer(x)
  loss = tf.reduce_mean(y**2)
# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)
print(grad)

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0.       , 1.0097853],
       [0.       , 2.0195706],
       [0.       , 3.029356 ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0.       , 1.0097853], dtype=float32)>]


tf.GradientTape().watch()

In [61]:
x0 = tf.Variable(0.0, trainable=False)
x1 = tf.Variable(10.0)
x2 = tf.constant(4.0)

with tf.GradientTape() as tape:
  tape.watch(x2)
  y0 = tf.math.sin(x0)
  y1 = tf.nn.softplus(x1)
  y2 = tf.pow(x2, 2.0)
  y = y0 + y1 + y2
  y_sum = tf.reduce_sum(y)

grad = tape.gradient(y_sum, {'x0': x0, 'x1': x1, 'x2': x2})

print('dy/dx0:', grad['x0'])
print('dy/dx1:', grad['x1'].numpy())
print('dy/dx2:', grad['x2'])


dy/dx0: None
dy/dx1: 0.9999546
dy/dx2: tf.Tensor(8.0, shape=(), dtype=float32)


Optimization of neural networks

In [60]:
# Optimizers play the role of tuning neural network parameters so that
# their task error is minimal
# For example task error can be the MSE
# for a classification task
opt = tf.keras.optimizers.SGD(learning_rate=0.1)
var = tf.Variable(1.0)
loss = lambda: (var ** 2)/2.0         # d(loss)/d(var1) = var1
opt.minimize(loss, [var]).numpy()
print(var.numpy())

0.9


Optimizer works with tf.GradientTape

In [None]:
var = tf.Variable(1.0)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
with tf.GradientTape() as tape:
  # loss
  loss = (var ** 2)/2.0
grads = tape.gradient(loss, [var])
optimizer.apply_gradients(zip(grads, [var]))
print(var.numpy())

# Implementing our first neural network

In [70]:
import tensorflow as tf
import numpy
import gzip

## Preparing the data

In [71]:
# Params for MNIST
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10

In [72]:
def extract_data(filename, num_images):
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(16)
        buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)
        data = np.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
        data = data / PIXEL_DEPTH
        data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
    return data
def extract_labels(filename, num_images):
    print('Extracting', filename)
    with gzip.open(filename) as bytestream:
        bytestream.read(8)
        buf = bytestream.read(1 * num_images)
        labels = np.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
    return labels
x_train = extract_data('train-images-idx3-ubyte.gz', 60000)
y_train = extract_labels('train-labels-idx1-ubyte.gz', 60000)
x_test = extract_data('t10k-images-idx3-ubyte.gz', 10000)
y_test = extract_labels('t10k-labels-idx1-ubyte.gz', 10000)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

train_ds = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(10000).batch(32)

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)


Extracting train-images-idx3-ubyte.gz
Extracting train-labels-idx1-ubyte.gz
Extracting t10k-images-idx3-ubyte.gz
Extracting t10k-labels-idx1-ubyte.gz
(60000, 28, 28, 1)
(60000,)
(10000, 28, 28, 1)
(10000,)


Building the neural network model

In [73]:
class MyModel(tf.keras.Model):
  def __init__(self):
    super(MyModel, self).__init__()
    self.conv1 = tf.keras.layers.Conv2D(32, 3, activation='relu')
    self.flatten = tf.keras.layers.Flatten()
    self.d1 = tf.keras.layers.Dense(128, activation='relu')
    self.d2 = tf.keras.layers.Dense(10)

  def call(self, x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    return self.d2(x)

# Create an instance of the model

model = MyModel()

Creating loss and optimizer instance

In [76]:
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

Training and Testing  
It may take a while. Be patient to see the result

In [80]:
accuracy_train = tf.keras.metrics.SparseCategoricalAccuracy()
accuracy_test = tf.keras.metrics.SparseCategoricalAccuracy()

# @tf.function
def train_step(images, labels):
  with tf.GradientTape() as tape:
    predictions = model(images, training=True)
    loss = loss_func(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  accuracy_train(labels, predictions)
  
  

EPOCHS = 5
for epoch in range(EPOCHS):
    accuracy_train.reset_states()
    for images, labels in train_ds:
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_func(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        accuracy_train(labels, predictions)

    for images, labels in test_ds:
        predictions = model(images)
        accuracy_test(labels, predictions)

    template = 'Epoch {}, Accuracy_train: {}, Accuracy_train: {}'
    print(template.format(epoch + 1, 
                          accuracy_train.result() * 100, 
                          accuracy_test.result() * 100))

Epoch 1, Accuracy_train: 99.67832946777344, Accuracy_train: 98.47000122070312
Epoch 2, Accuracy_train: 99.80833435058594, Accuracy_train: 98.48999786376953
Epoch 3, Accuracy_train: 99.78166961669922, Accuracy_train: 98.40999603271484
Epoch 4, Accuracy_train: 99.8116683959961, Accuracy_train: 98.4000015258789
Epoch 5, Accuracy_train: 99.87000274658203, Accuracy_train: 98.41200256347656
