## Example: Train a two layer ReLU network on random data with L2 loss

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.placeholder(tf.float32, shape=(D, H))
w2 = tf.placeholder(tf.float32, shape=(H, D))

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y

loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1))

In [7]:
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

with tf.Session() as sess:
    values = {x: np.random.randn(N, D),
             w1: np.random.randn(D, H),
             w2: np.random.randn(H, D),
             y: np.random.randn(N, D)}
    sess.run(tf.global_variables_initializer())
    
    learning_rate = 1e-5
    for t in range(5000):
        out = sess.run([loss, grad_w1, grad_w2], 
                       feed_dict=values)
        loss_val, grad_w1_val, grad_w2_val = out
        values[w1] -= learning_rate * grad_w1_val
        values[w2] -= learning_rate * grad_w2_val
        if(t % 100 == 0):
            print(str(t) + "step, loss value is: " + str(loss_val))
    

0step, loss value is: 47480790.0
100step, loss value is: 966.79553
200step, loss value is: 966.7927
300step, loss value is: 966.7901
400step, loss value is: 966.7875
500step, loss value is: 966.78485
600step, loss value is: 966.7822
700step, loss value is: 966.7796
800step, loss value is: 966.777
900step, loss value is: 966.77435
1000step, loss value is: 966.7717
1100step, loss value is: 966.7691
1200step, loss value is: 966.7665
1300step, loss value is: 966.76385
1400step, loss value is: 966.7612
1500step, loss value is: 966.7586
1600step, loss value is: 966.756
1700step, loss value is: 966.75336
1800step, loss value is: 966.75073
1900step, loss value is: 966.74805
2000step, loss value is: 966.7455
2100step, loss value is: 966.7428
2200step, loss value is: 966.74023
2300step, loss value is: 966.73755
2400step, loss value is: 966.735
2500step, loss value is: 966.7323
2600step, loss value is: 966.7297
2700step, loss value is: 966.72705
2800step, loss value is: 966.7244
2900step, loss va

## tf.gradients() function explain

In [12]:
x = tf.placeholder(tf.float32)
y = 2*x
var_grad = tf.gradients(y, x)
with tf.Session() as sess:
    my_x = np.random.randn(3)
    sess.run(tf.global_variables_initializer())
    print(sess.run(var_grad, feed_dict={x: my_x}))

[ 0.33668616  2.1784432  -2.4673347 ]
[<tf.Tensor 'gradients_7/mul_4_grad/Reshape_1:0' shape=<unknown> dtype=float32>]
