In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
from sklearn.datasets import fetch_california_housing

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.0.0
sys.version_info(major=3, minor=6, micro=8, releaselevel='final', serial=0)
matplotlib 3.1.1
numpy 1.17.2
pandas 0.25.3
sklearn 0.21.3
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


In [2]:
def f(x):
    return 3. * x ** 2 + 2. * x + 5

# 自定义极限求导
def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)

print(approximate_derivative(f, 0))

2.000000000000224


In [3]:
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


In [4]:
x1 = tf.Variable(2.0) # 注意：Variable
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

# tf.GradientTape 只能调用一次， 调用后资源会自动被释放掉
try:
    dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [5]:
x1 = tf.Variable(2.0)  # 注意：Variable
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as tape: # 持久保存tape，系统不会自动释放资源
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)
print(dz_x1, dz_x2)

del tape

tf.Tensor(9.0, shape=(), dtype=float32) tf.Tensor(42.0, shape=(), dtype=float32)


In [6]:
x1 = tf.Variable(2.0)  # 注意：Variable
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape: 
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1, x2]) # 一次传递多个变量同时求导

print(dz_x1x2)

[<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]


In [7]:
x1 = tf.constant(2.0)  # 若是常量constant
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:  # tf.GradientTape无法直接对常量constant求导
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1, x2]) # 一次传递多个变量同时求导

print(dz_x1x2)

[None, None]


In [8]:
x1 = tf.constant(2.0)  # 若是常量constant
x2 = tf.constant(3.0)
with tf.GradientTape() as tape: 
    # 需要tape关注常量constant
    tape.watch([x1, x2])
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1, x2]) # 一次传递多个变量同时求导

print(dz_x1x2)

[<tf.Tensor: id=111, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=117, shape=(), dtype=float32, numpy=42.0>]


In [9]:
# 两个目标函数对一个变量求导
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)

<tf.Tensor: id=140, shape=(), dtype=float32, numpy=13.0>

In [10]:
# 求二阶导数
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) for inner_grad in inner_grads]
print(outer_grads)
del inner_tape
del outer_tape

[[None, <tf.Tensor: id=177, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=188, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=186, shape=(), dtype=float32, numpy=14.0>]]


In [11]:
# 求二阶导数
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) for inner_grad in inner_grads]
print(inner_grads)
print(outer_grads)
del inner_tape
del outer_tape

[<tf.Tensor: id=211, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=217, shape=(), dtype=float32, numpy=42.0>]
[[None, <tf.Tensor: id=225, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=236, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=234, shape=(), dtype=float32, numpy=14.0>]]


In [12]:
# 梯度下降的简单逻辑模拟
learning_rate = 0.01
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.33264837>


In [13]:
# 结合Optimizer使用
learning_rate = 0.01
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr=learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.33264837>
