# Series 8: Neural Networks feat. TensorFlow

In [1]:
import tensorflow as tf
import numpy as np
import datetime

## Exercise Session Notes (ETHZ, May 6 2016)

 * Recommended: Udacity Deep Learning (uses TensorFlow)
 * No one library to rule them all
   * Configuration file-based: Caffe, DistBelief
   * Programmatic generation: Torch (Lua), Theano (Python), **TensorFlow** (Python)
 * Historical HPC progression: Fortran `->` C++ `->` GPU (CUDA, OpenCL)
 * TensorFlow was chosen for having better support for distributed computation (close call!)
 * TensorFlow is much slower than e.g. Torch or Theano (as of May 2016)


In [2]:
# A very simple example of the TensorFlow system.

input1 = tf.placeholder(tf.float32, shape=(1))
input2 = tf.placeholder(tf.float32, shape=(1))
output = tf.mul(input1, input2)

with tf.Session() as sess:
    print(sess.run([output], feed_dict={input1: [2], input2: [31]}))

[array([ 62.], dtype=float32)]


## Problem 1 (Getting Started with TensorFlow)

Based on the code skeleton provided by our kind TAs.
Note: TF is not designed to handle multiple pipelines in a single
file without proper modularization (or interactive sessions). Because of this, it's indicated
to just run one of the problems at a time, since otherwise you might
stumble upon some weird errors.

### 1.1 Multi-valued linear regression

In [3]:
# create some noisy data
TRAIN_EXAMPLES = 100000
INPUT_DIMENSION = 2
OUTPUT_DIMENSION = 3
x_data = np.random.rand(TRAIN_EXAMPLES, INPUT_DIMENSION).astype(np.float32)

# correct_W = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15]]
correct_W = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float32)
correct_b = np.array([11, 12, 13], dtype=np.float32)

noise_level = 0.01
y_data = np.dot(x_data, correct_W) + correct_b + np.random.normal(size=(TRAIN_EXAMPLES, OUTPUT_DIMENSION))

In [4]:
# It's better to work with a single large cell, in order
# to avoid stale tensors and stuff.

def problem_11():

    # define the symbolic variables
    W = tf.Variable(tf.random_uniform(correct_W.shape, -1.0, 1.0))
    b = tf.Variable(tf.zeros(correct_b.shape))

    # define the linear model
    # y_hat is symbolic!
    # 'x_data' is fixed (our data, duh!).
    y_hat = tf.matmul(x_data, W) + b

    # define the loss
    loss = tf.reduce_mean(tf.square(y_hat - y_data))
    tf.scalar_summary('log loss', tf.log(1.0 + loss))

    # define the optimizer
    step_size = 0.1
    optimizer = tf.train.GradientDescentOptimizer(step_size)
    train_op = optimizer.minimize(loss)

    # initialize the tensorflow session
    init = tf.initialize_all_variables()
    iterations = 500

    with tf.Session() as sess:
        sess.run(init)

        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter("train/ex1_{}".format(datetime.datetime.now().strftime("%s")), sess.graph)

        # call the train_op many times, each time it will update the variables W and b according to their gradients
        for step in range(1, iterations + 1):
            _, loss_value, summary_str = sess.run([train_op, loss, summary_op])
            summary_writer.add_summary(summary_str, step)
            if step % 100 == 0:
                print("iteration:", step, "loss:", loss_value)

        print("learned W:\n{}".format(sess.run(W)))
        print("learned b:\n{}".format(sess.run(b)))

### 1.2 Batching and TensorBoard

In [5]:
def problem_12():
    # define the symbolic variables
    W = tf.Variable(tf.random_uniform(correct_W.shape, -1.0, 1.0))
    b = tf.Variable(tf.zeros(correct_b.shape))
    
    # define the data placeholders
    batch_size = 10
    x_ph = tf.placeholder(tf.float32, shape=(batch_size, INPUT_DIMENSION))
    y_ph = tf.placeholder(tf.float32, shape=(batch_size, OUTPUT_DIMENSION))

    # define the model (using placeholders)
    y_hat_batch = tf.matmul(x_ph, W) + b

    # define the (stochastic!) loss
    loss_batch = tf.reduce_mean(tf.square(y_hat_batch - y_ph))
    tf.scalar_summary('log loss', tf.log(1.0 + loss_batch))  # attention: this is the stochastic loss, i.e. it will be noisy

    # define the optimizer
    step_size = 0.1
    optimizer = tf.train.GradientDescentOptimizer(step_size)
    train_op = optimizer.minimize(loss_batch)

    # initialize the tensorflow session
    init = tf.initialize_all_variables()
    with tf.Session() as sess:
        sess.run(init)

        summary_op = tf.merge_all_summaries()
        summary_writer = tf.train.SummaryWriter("train/ex2_{}".format(datetime.datetime.now().strftime("%s")), sess.graph)

    # call the train_op many times, each time it will update the variables W and b according to their gradients
        for step in range(5001):
            # determine the minibatch
            start_index = (batch_size * step) % x_data.shape[0]
            stop_index = start_index + batch_size

            # get the minibatch data
            x_minibatch = x_data[start_index:stop_index]
            y_minibatch = y_data[start_index:stop_index]

            feed_dict = {
                    x_ph: x_minibatch,
                    y_ph: y_minibatch
                    }

            _, loss_value, summary_str = sess.run([train_op, loss_batch, summary_op], feed_dict=feed_dict)
            if step % 250 == 0:
                summary_writer.add_summary(summary_str, step)
                print("iteration:", step, "loss:", loss_value)

        print("learned W:\n{}".format(sess.run(W)))
        print("learned b:\n{}".format(sess.run(b)))

In [6]:
problem_12()

iteration: 0 loss: 226.326
iteration: 250 loss: 1.23227
iteration: 500 loss: 0.816256
iteration: 750 loss: 0.817808
iteration: 1000 loss: 1.25341
iteration: 1250 loss: 0.767471
iteration: 1500 loss: 0.828023
iteration: 1750 loss: 0.703546
iteration: 2000 loss: 0.924839
iteration: 2250 loss: 1.85123
iteration: 2500 loss: 1.49962
iteration: 2750 loss: 0.580243
iteration: 3000 loss: 1.17276
iteration: 3250 loss: 1.46334
iteration: 3500 loss: 0.717523
iteration: 3750 loss: 0.81886
iteration: 4000 loss: 0.762826
iteration: 4250 loss: 1.01092
iteration: 4500 loss: 0.820645
iteration: 4750 loss: 0.957444
iteration: 5000 loss: 0.894558
learned W:
[[ 0.87903428  1.90676582  3.0542171 ]
 [ 3.89054012  4.98080063  6.04779243]]
learned b:
[ 10.99293709  11.94932175  13.05525112]
