In [1]:
%matplotlib inline


TensorFlow: Static Graphs
-------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation uses basic TensorFlow operations to set up a computational
graph, then executes the graph many times to actually train the network.

One of the main differences between TensorFlow and PyTorch is that TensorFlow
uses static computational graphs while PyTorch uses dynamic computational
graphs.

In TensorFlow we first set up the computational graph, then execute the same
graph many times.



In [2]:
import tensorflow as tf
import numpy as np

# First we set up the computational graph:

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create placeholders for the input and target data; these will be filled
# with real data when we execute the graph.
x = tf.placeholder(tf.float32, shape=(None, D_in))
y = tf.placeholder(tf.float32, shape=(None, D_out))

# Create Variables for the weights and initialize them with random data.
# A TensorFlow Variable persists its value across executions of the graph.
w1 = tf.Variable(tf.random_normal((D_in, H)))
w2 = tf.Variable(tf.random_normal((H, D_out)))

# Forward pass: Compute the predicted y using operations on TensorFlow Tensors.
# Note that this code does not actually perform any numeric operations; it
# merely sets up the computational graph that we will later execute.
h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)

# Compute loss using operations on TensorFlow Tensors
loss = tf.reduce_sum((y - y_pred) ** 2.0)

# Compute gradient of the loss with respect to w1 and w2.
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

# Update the weights using gradient descent. To actually update the weights
# we need to evaluate new_w1 and new_w2 when executing the graph. Note that
# in TensorFlow the the act of updating the value of the weights is part of
# the computational graph; in PyTorch this happens outside the computational
# graph.
learning_rate = 1e-6
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

# Now we have built our computational graph, so we enter a TensorFlow session to
# actually execute the graph.
with tf.Session() as sess:
    # Run the graph once to initialize the Variables w1 and w2.
    sess.run(tf.global_variables_initializer())

    # Create numpy arrays holding the actual data for the inputs x and targets
    # y
    x_value = np.random.randn(N, D_in)
    y_value = np.random.randn(N, D_out)
    for _ in range(500):
        # Execute the graph many times. Each time it executes we want to bind
        # x_value to x and y_value to y, specified with the feed_dict argument.
        # Each time we execute the graph we want to compute the values for loss,
        # new_w1, and new_w2; the values of these Tensors are returned as numpy
        # arrays.
        loss_value, _, _ = sess.run([loss, new_w1, new_w2],
                                    feed_dict={x: x_value, y: y_value})
        print(loss_value)

  from ._conv import register_converters as _register_converters


37150330.0
35839652.0
35671600.0
30922864.0
21490744.0
12222178.0
6287355.0
3364257.5
2046526.2
1426296.1
1093109.6
884464.4
736646.06
623493.0
533146.5
459286.56
398111.3
346897.75
303665.6
266918.6
235553.08
208599.06
185312.06
165112.97
147531.58
132156.95
118692.445
106842.516
96378.836
87133.5
78934.16
71636.66
65132.355
59322.098
54115.047
49444.883
45243.72
41454.773
38034.14
34939.35
32133.047
29584.607
27268.559
25162.453
23247.984
21500.14
19901.766
18438.525
17099.383
15871.17
14743.01
13705.85
12750.848
11871.532
11061.183
10313.422
9622.469
8983.492
8392.522
7846.0996
7339.205
6868.945
6432.494
6027.0986
5650.596
5300.0835
4973.841
4669.8223
4386.4614
4122.3945
3875.9656
3645.8235
3430.8164
3229.976
3042.3955
2866.9966
2702.702
2548.7827
2404.6155
2269.6335
2143.0586
2024.2738
1912.8217
1808.1282
1709.749
1617.2769
1530.3069
1448.4907
1371.4744
1298.9791
1230.7698
1166.4623
1105.8154
1048.6389
994.7019
943.8037
895.75085
850.4005
807.5541
767.0704
728.7974
692.61
658.39355