# Custom training

## Setup

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass
import tensorflow as tf
tf.random.set_seed(1)

## Fit a linear model

Use the concepts you have learned so far—`Tensor`, `Variable`, and `GradientTape`—to build and train a simple model.

Create a simple linear model, `f(x) = x * x * x * W2 + x * W1 + b`, which has two variables: `W2` and `W1` (weights) and `b` (bias). 

### Define the model



In [None]:
# Initialize the W2, W1 to `2.0`, `5.0`; Bias to `0.0`
class Model(object):
  def __init__(self):
    self.W2 = tf.Variable(2.0)
    self.W1 = tf.Variable(5.0)
    self.b = tf.Variable(0.0)
  def __call__(self, x):
    return self.W2 * x * x * x + self.W1 * x + self.b

### What will be the output of the model before training if we give x = 2.0 as input?

In [None]:
model = Model()
 
model(2.0)

### Define a loss function

Use the standard L2 loss:

In [None]:
# write code here
def loss(predicted_y, target_y):
  return tf.reduce_mean(tf.square(predicted_y - target_y))

### Obtain training data

First, synthesize the training data by adding random Gaussian (Normal) noise to the inputs:

In [None]:
TRUE_W2 = 1.0
TRUE_W1 = 0.0
TRUE_b = 0.0
NUM_EXAMPLES = 1000

inputs  = tf.random.normal(shape=[NUM_EXAMPLES])
noise   = tf.random.normal(shape=[NUM_EXAMPLES])
outputs = inputs * inputs * inputs * TRUE_W2 + inputs * TRUE_W1 + TRUE_b + noise

Before training the model, visualize the loss value by plotting the model's predictions in red and the training data in blue:

In [None]:
import matplotlib.pyplot as plt

plt.scatter(inputs, outputs, c='b')
plt.scatter(inputs, model(inputs), c='r')
plt.show()

print('Current loss: %1.6f' % loss(model(inputs), outputs).numpy())

### Define a training loop


In [None]:
def train(model, inputs, outputs, learning_rate):
  # write code here 
  with tf.GradientTape() as t:
    current_loss = loss(model(inputs), outputs)
  dW1, dW2, db = t.gradient(current_loss, [model.W1, model.W2, model.b])
  model.W1.assign_sub(learning_rate * dW1)
  model.W2.assign_sub(learning_rate * dW2)
  model.b.assign_sub(learning_rate * db)

In [None]:
model = Model()

# Run for 30 epochs
# Use learning rate as 0.01
W1s,W2s, bs = [], [], []
epochs = range(31)
for epoch in epochs:
  W1s.append(model.W1.numpy())
  W2s.append(model.W2.numpy())
  bs.append(model.b.numpy())
  current_loss = loss(model(inputs), outputs)

  train(model, inputs, outputs, learning_rate=0.01)
  print('Epoch %2d: W1=%1.2f W2=%1.2f, b=%1.2f, loss=%2.5f' %
        (epoch, W1s[-1], W2s[-1],bs[-1], current_loss))


What is the range of loss after the 30th epoch if the learning rate was 0.001?

In [None]:
model = Model()

# Run for 30 epochs
# Use learning rate as 0.001
W1s,W2s, bs = [], [], []
epochs = range(31)
for epoch in epochs:
  W1s.append(model.W1.numpy())
  W2s.append(model.W2.numpy())
  bs.append(model.b.numpy())
  current_loss = loss(model(inputs), outputs)

  train(model, inputs, outputs, learning_rate=0.001)
  print('Epoch %2d: W1=%1.2f W2=%1.2f, b=%1.2f, loss=%2.5f' %
        (epoch, W1s[-1], W2s[-1],bs[-1], current_loss))


What will be the values of ​ z+w​ if we execute the following code:

In [None]:
x = tf.constant(2.0)
with tf.GradientTape(persistent=True) as t:
  t.watch(x)
  y = x * x * x
  z = y * y
  w = z + 2*y + x
dw_dx = t.gradient(w, x)
dz_dx = t.gradient(z, x)
dy_dx = t.gradient(y, x)

In [30]:
z+w

<tf.Tensor: shape=(), dtype=float32, numpy=146.0>

In [32]:
dw_dx

<tf.Tensor: shape=(), dtype=float32, numpy=217.0>