In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

### 1. 手动求导

**一元函数求导**

In [2]:
def f(x):
    return 3. * x ** 2 + 2. * x -1

def approximate_derivative(f, x, eps = 1e-3):
    """函数 f 对 x 求导"""
    return (f(x + eps) - f(x - eps)) / (2. * eps)

In [3]:
print(approximate_derivative(f, 1.))

7.999999999999119


**多元函数求导**

In [4]:
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gredient(g, x1, x2, eps=1e-3):
    gred_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    gred_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return gred_x1, gred_x2

print(approximate_gredient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


### 2. 自动求导

In [6]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2) # 定义函数

dz_x1 = tape.gradient(z, x1)
print(dz_x1)

try:
    dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [7]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2) # 定义函数

dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1)
print(dz_x2)


tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [8]:
del tape

In [9]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)

[<tf.Tensor: id=118, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=124, shape=(), dtype=float32, numpy=42.0>]


**对常量求偏导**

In [10]:
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])

print(dz_x1x2)

[None, None]


In [11]:
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])

print(dz_x1x2)

[<tf.Tensor: id=140, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=146, shape=(), dtype=float32, numpy=42.0>]


**两个函数对一个变量求导**

In [12]:
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)
# 13 = 3 + 10

<tf.Tensor: id=169, shape=(), dtype=float32, numpy=13.0>

**二阶导数**

In [13]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2])
              for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape

[[None, <tf.Tensor: id=206, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=217, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=215, shape=(), dtype=float32, numpy=14.0>]]


### 3. 梯度下降

In [14]:
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


In [15]:
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(learning_rate = learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
