## Example: Train a two layer ReLU network on random data with L2 loss

In [1]:
import tensorflow as tf
import numpy as np

In [2]:
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape=(N, D))
y = tf.placeholder(tf.float32, shape=(N, D))
w1 = tf.placeholder(tf.float32, shape=(D, H))
w2 = tf.placeholder(tf.float32, shape=(H, D))

h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y

loss = tf.reduce_mean(tf.reduce_sum(diff**2, axis=1))

In [3]:
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

with tf.Session() as sess:
    values = {x: np.random.randn(N, D),
             w1: np.random.randn(D, H),
             w2: np.random.randn(H, D),
             y: np.random.randn(N, D)}
    sess.run(tf.global_variables_initializer())
    
    learning_rate = 1e-5
    for t in range(5000):
        out = sess.run([loss, grad_w1, grad_w2], 
                       feed_dict=values)
        loss_val, grad_w1_val, grad_w2_val = out
        values[w1] -= learning_rate * grad_w1_val
        values[w2] -= learning_rate * grad_w2_val
        if(t % 100 == 0):
            print(str(t) + "step, loss value is: " + str(loss_val))
    

0step, loss value is: 49488776.0
100step, loss value is: 968.7559
200step, loss value is: 968.75354
300step, loss value is: 968.7512
400step, loss value is: 968.7488
500step, loss value is: 968.74646
600step, loss value is: 968.74414
700step, loss value is: 968.7417
800step, loss value is: 968.7394
900step, loss value is: 968.73706
1000step, loss value is: 968.7346
1100step, loss value is: 968.7323
1200step, loss value is: 968.7299
1300step, loss value is: 968.72754
1400step, loss value is: 968.72516
1500step, loss value is: 968.7228
1600step, loss value is: 968.72046
1700step, loss value is: 968.71814
1800step, loss value is: 968.7157
1900step, loss value is: 968.7134
2000step, loss value is: 968.711
2100step, loss value is: 968.7086
2200step, loss value is: 968.7063
2300step, loss value is: 968.7039
2400step, loss value is: 968.7015
2500step, loss value is: 968.69916
2600step, loss value is: 968.6968
2700step, loss value is: 968.6944
2800step, loss value is: 968.692
2900step, loss va

## tf.gradients() function explain

In [4]:
x = tf.placeholder(tf.float32)
y = 2*x
var_grad = tf.gradients(y, x)
with tf.Session() as sess:
    my_x = np.random.randn(3)
    sess.run(tf.global_variables_initializer())
    print(sess.run(var_grad, feed_dict={x: my_x}))

[array([2., 2., 2.], dtype=float32)]
