In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import shutil
import sys
import time
import pprint
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.preprocessing import StandardScaler

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__) 

2.1.0
sys.version_info(major=3, minor=6, micro=5, releaselevel='final', serial=0)
matplotlib 2.2.2
numpy 1.18.1
pandas 0.25.3
sklearn 0.19.1
tensorflow 2.1.0
tensorflow_core.python.keras.api._v2.keras 2.2.4-tf


In [7]:
def f(x):
    return 3.*x**2 + 2.*x - 1

def approximate_derivative(f,x,eps=1e-3):
    return (f(x+eps) - f(x-eps))/(2.*eps)

In [4]:
approximate_derivative(f,1,1e-4)

7.999999999994678

In [25]:
def g(x1,x2):
    return (x1+5)*(x2**2)

def approximate_gradient(g,x1,x2,eps=1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x,x2), x1, eps)    
    dg_x2 = approximate_derivative(lambda x: g(x1,x), x2, eps)
    return dg_x1, dg_x2


approximate_derivative(g,2.0,3.0)

TypeError: g() missing 1 required positional argument: 'x2'

In [23]:
approximate_derivative(lambda x:g(2,x), 3,1e-3)

41.999999999994486

In [None]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1,x2)
dz_x1 = tape.gradient(z,x1)
print(dz_x1)

try:
    dz_x2 = tape.gradient(z,x2)
except RuntimeError as ex:
    print(ex)

In [None]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as tape:
    z = g(x1,x2)
dz_x1 = tape.gradient(z,x1)
print(dz_x1)

try:
    dz_x2 = tape.gradient(z,x2)
except RuntimeError as ex:
    print(ex)
print(dz_x2)

del tape

In [27]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1,x2)
dz_x1x2 = tape.gradient(z,[x1,x2])
print(dz_x1x2)

[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]


In [29]:
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3*x
    z2 = x ** 2
tape.gradient([z1,z2],x)

<tf.Tensor: shape=(), dtype=float32, numpy=13.0>

In [35]:
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1,x2)
    inner_grads = inner_tape.gradient(z,[x1,x2])
outer_grads = [outer_tape.gradient(inner_grad,[x1,x2]) for inner_grad in inner_grads]

print(outer_grads)
del inner_grads
del outer_grads

[[None, <tf.Tensor: shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: shape=(), dtype=float32, numpy=14.0>]]


In [37]:
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z,x)
    x.assign_sub(learning_rate*dz_dx)
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


In [38]:
#GradientTape 和 optimizer 配合使用

learning_rate = 0.1
x = tf.Variable(0.0)
optimizer = keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z,x)
    optimizer.apply_gradients([(dz_dx,x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
