In [1]:
from operations import FullyConnected, ReLU, Sigmoid, SquaredError, Sum, Multiply

In [2]:
import numpy as np

In [3]:
np.random.seed(42)

In [4]:
X = np.array([[2], [5]])

In [5]:
X

array([[2],
       [5]])

#### y = sin(x) + 0.3

In [6]:
y = np.sin(X) + 0.3

In [7]:
y

array([[ 1.20929743],
       [-0.65892427]])

# Forward

In [8]:
layer1 = FullyConnected(1, 3)

In [9]:
layer1.w, layer1.b

(array([[ 0.49671415, -0.1382643 ,  0.64768854]]), array([[0., 0., 0.]]))

In [10]:
layer1_output = layer1.forward(X)

In [11]:
layer1_output

array([[ 0.99342831, -0.2765286 ,  1.29537708],
       [ 2.48357077, -0.69132151,  3.23844269]])

In [12]:
relu_1 = ReLU()

In [13]:
relu_1_output = relu_1.forward(layer1_output)

In [14]:
relu_1_output

array([[0.99342831, 0.        , 1.29537708],
       [2.48357077, 0.        , 3.23844269]])

In [15]:
layer2 = FullyConnected(3, 1)

In [16]:
layer2.w, layer2.b

(array([[ 0.8793217 ],
        [-0.13518851],
        [-0.13517904]]), array([[0.]]))

In [17]:
layer2_output = layer2.forward(relu_1_output)

In [18]:
layer2_output

array([[0.69843524],
       [1.7460881 ]])

In [19]:
squared_error = SquaredError()

In [20]:
squared_error_output = squared_error.forward(y, layer2_output)

In [21]:
squared_error_output

array([[0.26098017],
       [5.78408454]])

In [22]:
sum_op = Sum()

In [23]:
sum_output = sum_op.forward(squared_error_output)

In [24]:
sum_output

6.045064709380343

In [25]:
mul = Multiply()

In [26]:
mul_output = mul.forward(1/2., sum_output)

In [27]:
mul_output

3.0225323546901715

# Backward

In [28]:
upstream_gradients = 1

In [29]:
upstream_gradients = mul.backward(upstream_gradients)

In [30]:
upstream_gradients

(6.045064709380343, 0.5)

In [31]:
upstream_gradients = sum_op.backward(0.5)

In [32]:
upstream_gradients

array([[0.5],
       [0.5]])

In [33]:
upstream_gradients = squared_error.backward(upstream_gradients)

In [34]:
upstream_gradients

array([[-0.51086219],
       [ 2.40501238]])

In [35]:
upstream_gradients = layer2.backward(upstream_gradients)

In [36]:
upstream_gradients

array([[-0.4492122 ,  0.0690627 ,  0.06905786],
       [ 2.11477957, -0.32513005, -0.32510725]])

In [37]:
layer2.w_gradients, layer2.b_gradients

(array([[5.46551348],
        [0.        ],
        [7.12673559]]), array([[1.89415019]]))

In [38]:
upstream_gradients = relu_1.backward(upstream_gradients)

In [39]:
upstream_gradients

array([[-0.4492122 ,  0.        ,  0.06905786],
       [ 2.11477957, -0.        , -0.32510725]])

In [40]:
upstream_gradients = layer1.backward(upstream_gradients)

In [41]:
upstream_gradients

array([[-0.17840208],
       [ 0.8398727 ]])

In [42]:
layer1.w_gradients, layer1.b_gradients

(array([[ 9.67547342,  0.        , -1.48742055]]),
 array([[ 1.66556736,  0.        , -0.2560494 ]]))

# 利用 TensorFlow 进行对比验证

In [43]:
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [44]:
w_1 = tf.Variable(layer1.w, name="w_1")
b_1 = tf.Variable(layer1.b, name="b_1")

In [45]:
w_2 = tf.Variable(layer2.w, name="w_2")
b_2 = tf.Variable(layer2.b, name="b_2")

In [46]:
input_x = tf.placeholder(dtype=tf.float64, shape=(None, 1))
input_y = tf.placeholder(dtype=tf.float64, shape=(None, 1))

In [47]:
tf_layer_1 = tf.matmul(input_x, w_1) + b_1
tf_relu_1 = tf.nn.relu(tf_layer_1)
tf_layer_2 = tf.matmul(tf_relu_1, w_2) + b_2

In [48]:
loss = tf.reduce_mean(tf.square(input_y - tf_layer_2), axis=0)

In [49]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
grads_and_vars = optimizer.compute_gradients(loss)

In [50]:
init_op = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init_op)

In [51]:
sess.run(loss, feed_dict={input_x: X, input_y: y})

array([3.02253235])

In [52]:
sess.run(grads_and_vars, feed_dict={input_x: X, input_y: y})

[(array([[ 9.67547342,  0.        , -1.48742055]]),
  array([[ 0.49671415, -0.1382643 ,  0.64768854]])),
 (array([[ 1.66556736,  0.        , -0.2560494 ]]), array([[0., 0., 0.]])),
 (array([[5.46551348],
         [0.        ],
         [7.12673559]]), array([[ 0.8793217 ],
         [-0.13518851],
         [-0.13517904]])),
 (array([[1.89415019]]), array([[0.]]))]

### 可以看出 TensorFlow 的计算结果与上面的结果一致，神经网络框架的前向及反向实现正确