In [1]:
import os
import sys
import time
import numpy as np
import pandas as pd
import sklearn
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf
from tensorflow import keras

print(sys.version_info)
for module in mpl,np,pd,sklearn,tf,keras:
    print(module.__name__,module.__version__)

sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)
matplotlib 3.1.1
numpy 1.16.4
pandas 0.24.2
sklearn 0.21.2
tensorflow 2.1.0
tensorflow_core.python.keras.api._v2.keras 2.2.4-tf


In [2]:
# tf.function and auto-graph
def scaled_elu(z,scale=1.0,alpha=1.0):
    #z > 0 ? scale * z: scale * alpha * tf.nn.elu(z)
    is_positive = tf.greater_equal(z,0.0)
    return scale * tf.where(is_positive,z,alpha * tf.nn.elu(z))
print(scaled_elu(tf.constant(-3.)))
print(scaled_elu(tf.constant([-3.,-2.5])))

scaled_elu_tf = tf.function(scaled_elu)
print(scaled_elu_tf(tf.constant(-3.)))
print(scaled_elu_tf(tf.constant([-3.,-2.5])))

tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915  ], shape=(2,), dtype=float32)
tf.Tensor(-0.95021296, shape=(), dtype=float32)
tf.Tensor([-0.95021296 -0.917915  ], shape=(2,), dtype=float32)


In [3]:
print(scaled_elu_tf.python_function is scaled_elu)

True


In [4]:
%timeit scaled_elu(tf.random.normal([1000,1000]))
%timeit scaled_elu_tf(tf.random.normal([1000,1000]))

103 ms ± 6.71 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
84.5 ms ± 5.88 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [5]:
# 计算1 + 1/2 + 1/2^2+ ...+ 1/2^n
@tf.function
def converge_to_2(n_iters):
    total = tf.constant(0.)
    increment = tf.constant(1.)
    for _ in range(n_iters):
        total += increment
        increment /= 2.0
    return total
print(converge_to_2(20))

tf.Tensor(1.9999981, shape=(), dtype=float32)


In [6]:
#使用tf中的autograph 将python 函数转化为 tf函数 展示
def display_tf_code(func):
    code = tf.autograph.to_code(func)
    from IPython.display import display,Markdown
    display(Markdown("'''python\n{}\n'''".format(code)))

In [7]:
display_tf_code(scaled_elu)

'''python
def tf__scaled_elu(z, scale=None, alpha=None):
  do_return = False
  retval_ = ag__.UndefinedReturnValue()
  with ag__.FunctionScope('scaled_elu', 'fscope', ag__.ConversionOptions(recursive=True, user_requested=True, optional_features=(), internal_convert_user_code=True)) as fscope:
    is_positive = ag__.converted_call(tf.greater_equal, (z, 0.0), None, fscope)
    do_return = True
    retval_ = fscope.mark_return_value(scale * ag__.converted_call(tf.where, (is_positive, z, alpha * ag__.converted_call(tf.nn.elu, (z,), None, fscope)), None, fscope))
  do_return,
  return ag__.retval(retval_)

'''

In [8]:
display_tf_code(converge_to_2)

ConversionError: converting <tensorflow.python.eager.def_function.Function object at 0x0000000015FC4978>: ValueError: Cannot apply autograph to a function that doesn't expose a __code__ object. If this is a @tf.function, try passing f.python_function instead.

In [9]:
#变量创建需要放在函数外面
var = tf.Variable(0.)
@tf.function
def add_21():
    return var.assign_add(21)
print(add_21())

tf.Tensor(21.0, shape=(), dtype=float32)


In [10]:
#函数签名，限定参数类型
@tf.function(input_signature=[tf.TensorSpec([None],tf.int32,name='x')])
def cube(z):
    return tf.pow(z,3)
try:
    print(cube(tf.constant([1.,2.,3.])))
except ValueError as ex:
    print(ex)

print(cube(tf.constant([1,2,3])))

Python inputs incompatible with input_signature:
  inputs: (
    tf.Tensor([1. 2. 3.], shape=(3,), dtype=float32))
  input_signature: (
    TensorSpec(shape=(None,), dtype=tf.int32, name='x'))
tf.Tensor([ 1  8 27], shape=(3,), dtype=int32)


In [11]:
# @tf.function 将 py func -> tf graph
# 可以使用get_concrete_function 通过 add input singnature 便于 SaveModel
cube_func_int32 = cube.get_concrete_function(tf.TensorSpec([None],tf.int32))
print(cube_func_int32)

<tensorflow.python.eager.function.ConcreteFunction object at 0x0000000016324160>


In [15]:
print(cube_func_int32 is cube.get_concrete_function(tf.TensorSpec([5],tf.int32)))
print(cube_func_int32 is cube.get_concrete_function(tf.constant([1,2,3])))

True
True


In [16]:
cube_func_int32.graph

<tensorflow.python.framework.func_graph.FuncGraph at 0x162f14e0>

In [17]:
cube_func_int32.graph.get_operations()

[<tf.Operation 'x' type=Placeholder>,
 <tf.Operation 'Pow/y' type=Const>,
 <tf.Operation 'Pow' type=Pow>,
 <tf.Operation 'Identity' type=Identity>]

In [19]:
pow_op = cube_func_int32.graph.get_operations()[2]
print(pow_op)

name: "Pow"
op: "Pow"
input: "x"
input: "Pow/y"
attr {
  key: "T"
  value {
    type: DT_INT32
  }
}



In [20]:
print(list(pow_op.inputs))
print(list(pow_op.outputs))

[<tf.Tensor 'x:0' shape=(None,) dtype=int32>, <tf.Tensor 'Pow/y:0' shape=() dtype=int32>]
[<tf.Tensor 'Pow:0' shape=(None,) dtype=int32>]


In [21]:
cube_func_int32.graph.as_graph_def()

node {
  name: "x"
  op: "Placeholder"
  attr {
    key: "_user_specified_name"
    value {
      s: "x"
    }
  }
  attr {
    key: "dtype"
    value {
      type: DT_INT32
    }
  }
  attr {
    key: "shape"
    value {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
node {
  name: "Pow/y"
  op: "Const"
  attr {
    key: "dtype"
    value {
      type: DT_INT32
    }
  }
  attr {
    key: "value"
    value {
      tensor {
        dtype: DT_INT32
        tensor_shape {
        }
        int_val: 3
      }
    }
  }
}
node {
  name: "Pow"
  op: "Pow"
  input: "x"
  input: "Pow/y"
  attr {
    key: "T"
    value {
      type: DT_INT32
    }
  }
}
node {
  name: "Identity"
  op: "Identity"
  input: "Pow"
  attr {
    key: "T"
    value {
      type: DT_INT32
    }
  }
}
versions {
  producer: 175
}

In [23]:
cube_func_int32.graph.get_operation_by_name("x")

<tf.Operation 'x' type=Placeholder>

In [24]:
cube_func_int32.graph.get_tensor_by_name("x:0")

<tf.Tensor 'x:0' shape=(None,) dtype=int32>

In [31]:
#自定义求导
def f(x):
    return 3.* x ** 2 + 2 * x - 1

def approximae_derivative(f,x,eps = 1e-3):
    return (f(x+eps) - f(x-eps)) / (2*eps)
print(approximae_derivative(f,1.))

7.999999999999119


In [32]:
#多元求导
def g(x1,x2):
    return (x1+5)*(x2**2)
def approximate_gradient(g,x1,x2,eps=1e-3):
    #对x1求偏导
    dg_x1 = approximae_derivative(lambda x: g(x,x2),x1,eps)
    #对x2求偏导
    dg_x2 = approximae_derivative(lambda x: g(x1,x),x2,eps)
    return dg_x1,dg_x2

print(approximate_gradient(g,2.,3.))

(8.999999999993236, 41.999999999994486)


In [33]:
#使用tf api 求导
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1,x2)
dz_x1 = tape.gradient(z,x1)
print(dz_x1)
#tf.GradientTape()参数persistent需要设置为True ,否则with上下文使用一次会自动关闭tape
try:
    dz_x2 = tape.gradient(z,x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [35]:
#使用tf api 求导
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent=True) as tape:
    z = g(x1,x2)
dz_x1 = tape.gradient(z,x1)
print(dz_x1)
#tf.GradientTape()参数persistent需要设置为True ,否则with上下文使用一次会自动关闭tape,并且使用完后需要自己手动del删除关闭
try:
    dz_x2 = tape.gradient(z,x2)
    print(dz_x2)
    del tape
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [36]:
#使用tf api 求导，或者我们可以传入求偏导自变量数组，一次求就不必设置persistent为True
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1,x2)
dz_x1x2 = tape.gradient(z,[x1,x2])
print(dz_x1x2)

[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]


In [38]:
#使用tf api 求导，对于常量，默认无法求导，需要tape.watch来关注常量方可
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch([x1,x2])
    z = g(x1,x2)
dz_x1x2 = tape.gradient(z,[x1,x2])
print(dz_x1x2)

[<tf.Tensor: shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: shape=(), dtype=float32, numpy=42.0>]


In [39]:
#多个函数对同一变量求导的和
x = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z1 = 3 * x 
    z2 = x ** 2
tape.gradient([z1,z2],x)

<tf.Tensor: shape=(), dtype=float32, numpy=13.0>

In [50]:
#求解二阶导数
#使用tf api 求导，对于常量，默认无法求导，需要tape.watch来关注常量方可
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
def t(x1,x2):
    return (x1+5)*(x2**2)
with tf.GradientTape(persistent=True) as second_tape:
    with tf.GradientTape(persistent=True) as first_tape:
        z = t(x1,x2)
    first_grads = first_tape.gradient(z,[x1,x2])
second_grads = [second_tape.gradient(first_grad,[x1,x2]) for first_grad in first_grads]
print(second_grads)

del first_tape
del second_tape

[[None, <tf.Tensor: shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: shape=(), dtype=float32, numpy=14.0>]]


In [62]:
#模拟带学习率的梯度下降
def k(x):
    return 3.* x ** 2 + 2. * x - 1.
learning_rate = 1e-2
x = tf.Variable(0.0)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = k(x)
    dz_dx = tape.gradient(z,x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

#使用keras 模拟
optimizer = tf.keras.optimizers.SGD(lr = learning_rate)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = k(x)
    dz_dx = tape.gradient(z,x)
    optimizer.apply_gradients([(dz_dx,x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.33264837>
<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.33333194>
