关于二阶梯度求导（自动微分求导有待研究）弄得不是很明白？？？？？？？？？？？？？？？？？？？

In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import tensorflow as tf
from tensorflow import keras

print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__)
    print(module.__version__)

sys.version_info(major=3, minor=7, micro=5, releaselevel='final', serial=0)
matplotlib
3.1.2
numpy
1.17.4
pandas
0.25.3
sklearn
0.22
tensorflow
2.0.0
tensorflow_core.keras
2.2.4-tf


In [2]:
# 常规求导方法
def f(x):
    return 3. * x ** 2 + 2. * x -1

def approximate_derivative(f, x, eps=1e-3):
    return (f(x + eps) - f(x-eps)) / (2 * eps)

print(approximate_derivative(f, 1.))

7.999999999999119


In [3]:
# 多变量求偏导方法
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return dg_x1, dg_x2

print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


In [4]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

# tape只能运行一次，然后就被自动删除回收
try:
    dz_x2 = tape.gradient(z, x2)
    print(dz_x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [5]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)

# 设置persisten参数可以多次调用，但是调用过后要手动删除
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)
dz_x1 = tape.gradient(z, x1)
dz_x2 = tape.gradient(z, x2)

print(dz_x1)
print(dz_x2)

del tape

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [6]:
# 对各变量同时求导
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)

with tf.GradientTape() as tape:     # 只用一次tape，不用设置persisten参数
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1,x2])
print(dz_x1x2)

del tape

[<tf.Tensor: id=89, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=95, shape=(), dtype=float32, numpy=42.0>]


In [7]:
# 关注常量的导数
x1 = tf.constant(2.)
x2 = tf.constant(3.)

with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)

dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)

del tape

[<tf.Tensor: id=104, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=110, shape=(), dtype=float32, numpy=42.0>]


In [8]:
# 多个目标函数对同一变量进行求导
x = tf.Variable(5.0)

with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)    # 结果是两个目标函数求导的和

<tf.Tensor: id=133, shape=(), dtype=float32, numpy=13.0>

In [9]:
# 求二阶导数
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)

with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) for inner_grad in inner_grads]

print(inner_grads)
print(outer_grads)

[<tf.Tensor: id=156, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=162, shape=(), dtype=float32, numpy=42.0>]
[[None, <tf.Tensor: id=170, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=181, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=179, shape=(), dtype=float32, numpy=14.0>]]


In [10]:
# 自定义梯度下降
def k(x):
#     return 3. * x ** 2 + 2. * x -1
    return tf.cos(x)

learning_rate = 0.01
x = tf.Variable(-0.1)

optimizer = keras.optimizers.SGD(lr=learning_rate)

for _ in range(1000):
    with tf.GradientTape(persistent=True) as tape:
        z = k(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])  # 做的事情是 x.assign(x - learning_rate * dz_dx)
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-3.139815>
