# TensorFlow workshop
## Basics
### Defining the graph

In [1]:
import tensorflow as tf
import numpy as np
import time
import random

In [2]:
a = tf.constant(3)
b = tf.constant(3)
c = a * b

### Evaluating graph nodes

In [3]:
# trace c, c.eval() here

In [5]:
sess = tf.Session()

In [5]:
c.eval(session=sess)

9

In [6]:
sess.run(c)

9

In [7]:
with sess.as_default():
    print c.eval()

9


In [8]:
sess.run(a)

3

## Variables

In [11]:
a = tf.Variable(initial_value=3)
b = tf.Variable(initial_value=5)
c = a * b
sess.run(c, feed_dict={a: 10, b: 12})

120

In [12]:
a = tf.Variable(initial_value=3)
update = tf.assign(a, a + tf.constant(1))
# try to sess.run(update) here

In [13]:
sess.run(tf.initialize_all_variables())
sess.run(update)
sess.run(a)

4

In [14]:
sess.run(update)
sess.run(a)

5

In [15]:
print sess.run(a + a, feed_dict={a: 10})
print sess.run(a)

20
5


In [16]:
a = tf.placeholder(tf.float32)
# try running sess.run(a) here

In [17]:
sess.run(a + a, feed_dict={a: 10})

20.0

## Tensors

Tensors are a generalization of matrices and vectors to more than two dimensions. A vector is a 1-dimensional tensor, a matrix is a 2-dimensional tensor.

In [16]:
a = tf.constant(np.array([[1,2],[3,4]]))
b = tf.constant([[1,2],[3,4]])

In [34]:
sess.run(a)

array([[1, 2],
       [3, 4]])

In [17]:
sess.run(b)

array([[1, 2],
       [3, 4]], dtype=int32)

In [19]:
a = tf.constant(np.array([[1,2],[3,4]]), dtype=np.float64)
b = tf.constant([[1,2],[3,4]], dtype=np.float64)
sess.run(b)

array([[ 1.,  2.],
       [ 3.,  4.]])

Most of the things you can do with numpy on arrays you can do with tensorflow on tensors:

In [23]:
tf.pow(a, 2).eval(session=sess)

array([[  1.,   4.],
       [  9.,  16.]])

In [22]:
(a + a).eval(session=sess)

array([[ 2.,  4.],
       [ 6.,  8.]])

In [21]:
(a * a).eval(session=sess)

array([[  1.,   4.],
       [  9.,  16.]])

In [20]:
tf.matmul(a, a).eval(session=sess)

array([[  7.,  10.],
       [ 15.,  22.]])

### Running on different devices

In [9]:
with tf.device("/cpu:0"):
    a = tf.constant(np.zeros((3000, 3000)))
    b = tf.constant(np.zeros((3000, 3000)))
    c = tf.matmul(a, b)
    print c
    started = time.time()
    print sess.run(c)
    print time.time() - started

Tensor("MatMul:0", shape=(3000, 3000), dtype=float64, device=/device:CPU:0)
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
3.78189301491


In [10]:
with tf.device("/gpu:0"):
    a = tf.constant(np.zeros((3000, 3000)))
    b = tf.constant(np.zeros((3000, 3000)))
    c = tf.matmul(a, b)
    print c
    started = time.time()
    print sess.run(c)
    print time.time() - started

Tensor("MatMul_1:0", shape=(3000, 3000), dtype=float64, device=/device:GPU:0)
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
6.80619692802


## Examples

In [18]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [19]:
input = tf.placeholder(tf.float32, [None, 784])
label = tf.placeholder(tf.float32, [None, 10])

In [20]:
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

logits = tf.matmul(input, W) + b
output = tf.nn.softmax(logits)

In [21]:
loss = tf.reduce_mean(-tf.reduce_sum(label * tf.log(output), reduction_indices=[1]))
correct_samples = tf.reduce_sum(tf.reduce_sum(label * output))

In [22]:
    train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

    sess.run(tf.initialize_all_variables())

    for i in range(1000):
        batch_input, batch_label = mnist.train.next_batch(128)
        sess.run(train_step, feed_dict={input: batch_input, label: batch_label})
        if i % 200 == 0:
            print i, sess.run(correct_samples, feed_dict={input: batch_input, label: batch_label}) / 128.0

    num_test_samples = mnist.test.images.shape[0]
    print "FINAL", sess.run(correct_samples, feed_dict={input: mnist.test.images, label: mnist.test.labels}) / num_test_samples

0 0.121784970164
200 0.641116380692
400 0.740849196911
600 0.764590203762
800 0.804321885109
FINAL 0.82025859375


In [23]:
def train(output, optimizer="sgd", epochs=1000):
    loss = tf.reduce_mean(-tf.reduce_sum(label * tf.log(output), reduction_indices=[1]))
    correct_samples = tf.reduce_sum(tf.reduce_sum(label * output))

    if optimizer == "sgd":
        train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)
    elif optimizer == "adam":
        train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
    else:
        assert False, optimizer

    sess.run(tf.initialize_all_variables())

    for i in range(epochs):
        batch_input, batch_label = mnist.train.next_batch(128)
        sess.run(train_step, feed_dict={input: batch_input, label: batch_label})
        if i % 200 == 0:
            print i, sess.run(correct_samples, feed_dict={input: batch_input, label: batch_label}) / 128.0

    num_test_samples = mnist.test.images.shape[0]
    print "FINAL", sess.run(correct_samples, feed_dict={input: mnist.test.images, label: mnist.test.labels}) / num_test_samples

In [24]:
W1 = tf.Variable(tf.zeros([784, 200]))
b1 = tf.Variable(tf.zeros([200]))

W2 = tf.Variable(tf.zeros([200, 10]))
b2 = tf.Variable(tf.zeros([10]))

layer1 = tf.matmul(input, W1) + b1
activation1 = tf.nn.sigmoid(layer1)
logits = tf.matmul(activation1, W2) + b2
output = tf.nn.softmax(logits)

In [25]:
train(output)

0 0.106626734138
200 0.102996751666
400 0.101929455996
600 0.101439289749
800 0.104079857469
FINAL 0.102699694824


In [26]:
W1 = tf.Variable(tf.truncated_normal(shape=[784, 200], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[200]))

W2 = tf.Variable(tf.truncated_normal(shape=[200, 10], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[10]))

layer1 = tf.matmul(input, W1) + b1
activation1 = tf.nn.sigmoid(layer1)
logits = tf.matmul(activation1, W2) + b2
output = tf.nn.softmax(logits)

In [27]:
train(output)

0 0.0995512455702
200 0.551346421242
400 0.656971812248
600 0.719747543335
800 0.776132881641
FINAL 0.793150488281


### Convolutional networks

In [28]:
def convolution(flow, kernel_size, in_maps, out_maps):
    W = tf.Variable(tf.truncated_normal(shape=[kernel_size, kernel_size, in_maps, out_maps], stddev=0.1))
    b = tf.Variable(tf.constant(0.1, shape=[out_maps]))
    return tf.nn.conv2d(flow, W, strides=[1, 1, 1, 1], padding='SAME') + b

def pooling(flow, kernel_size):

     return tf.nn.max_pool(flow, ksize=[1, kernel_size, kernel_size, 1], strides=[1, kernel_size, kernel_size, 1], padding='SAME')
    

In [29]:
# I have to use with tf.device("/cpu:0"): on the laptop I use for this presentation due to misconfigured cudnn
# Feel free to remove it or replace with /gpu:0 when run on a GPU-enabled machine
with tf.device("/cpu:0"):
    flow = input

    flow = tf.reshape(flow, [-1,28,28,1])

    flow = convolution(flow, 5, 1, 32)
    flow = tf.nn.relu(flow)
    flow = pooling(flow, 2)

    flow = convolution(flow, 5, 32, 32)
    flow = tf.nn.relu(flow)
    flow = pooling(flow, 2)

    flow = tf.reshape(flow, [-1, 7 * 7 * 32])


In [30]:
W1 = tf.Variable(tf.truncated_normal(shape=[7 * 7 * 32, 200], stddev=0.1))
b1 = tf.Variable(tf.constant(0.1, shape=[200]))

W2 = tf.Variable(tf.truncated_normal(shape=[200, 10], stddev=0.1))
b2 = tf.Variable(tf.constant(0.1, shape=[10]))

""" note the input to matmul is now flow, not input """
layer1 = tf.matmul(flow, W1) + b1
activation1 = tf.nn.relu(layer1)
logits = tf.matmul(activation1, W2) + b2
output = tf.nn.softmax(logits)

In [31]:
with tf.device("/cpu:0"):
    train(output, optimizer="adam", epochs=2000)

0 0.0930055379868
200 0.779168963432
400 0.849575698376
600 0.894972443581
800 0.919608831406
1000 0.886497139931
1200 0.93567109108
1400 0.968767404556
1600 0.942642331123
1800 0.939561843872
FINAL 0.956352050781


## Sequence to sequence

In [115]:
def gen_sample():
    inp = [random.randint(0, 1) for _ in range(10)]
    if inp[0] == 1:
        out = [1 - x for x in inp]
    else:
        out = [x for x in reversed(inp)]
    return inp, out
        
train_samples = 12800
test_samples = 512
samples = [gen_sample() for _ in range(train_samples + test_samples)]

input_data = [np.array(x[0]) for x in samples[:train_samples]]
label_data = [np.array(x[1]) for x in samples[:train_samples]]

input_test = [np.array(x[0]) for x in samples[train_samples:]]
label_test = [np.array(x[1]) for x in samples[train_samples:]]

samples[:10]
    

[([0, 0, 1, 1, 1, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 1, 1, 1, 0, 0]),
 ([1, 1, 0, 1, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 1, 0, 1, 1, 1]),
 ([1, 1, 1, 0, 1, 1, 1, 1, 1, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 1]),
 ([1, 0, 1, 0, 1, 0, 0, 1, 1, 1], [0, 1, 0, 1, 0, 1, 1, 0, 0, 0]),
 ([0, 0, 0, 1, 1, 1, 0, 1, 1, 0], [0, 1, 1, 0, 1, 1, 1, 0, 0, 0]),
 ([0, 1, 1, 0, 0, 0, 1, 1, 1, 0], [0, 1, 1, 1, 0, 0, 0, 1, 1, 0]),
 ([0, 1, 1, 0, 1, 1, 1, 1, 0, 0], [0, 0, 1, 1, 1, 1, 0, 1, 1, 0]),
 ([1, 0, 0, 0, 1, 1, 1, 0, 0, 0], [0, 1, 1, 1, 0, 0, 0, 1, 1, 1]),
 ([1, 1, 0, 1, 0, 1, 0, 1, 1, 1], [0, 0, 1, 0, 1, 0, 1, 0, 0, 0]),
 ([0, 1, 0, 1, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 1, 0, 1, 0])]

In [117]:
inputs = [tf.placeholder(tf.int32, shape=(None,)) for x in range(10)]
labels = [tf.placeholder(tf.int32, shape=(None,)) for x in range(10)]
weights = [tf.constant(1.0) for x in range(10)]

outputs = labels[-1:] + labels[:-1]

# this way we can define a single LSTM cell:
single_cell = tf.nn.rnn_cell.BasicLSTMCell(128, state_is_tuple=True) # state_is_tuple is to disable a warning

# instead, we will use two layer GRU network
cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.GRUCell(128) for x in range(2)])

outputs, states = tf.nn.seq2seq.embedding_rnn_seq2seq(inputs, outputs, cell, 2, 2, 128, scope="k")

loss = tf.nn.seq2seq.sequence_loss(outputs, labels, weights)

train_step = tf.train.AdamOptimizer(0.01).minimize(loss)

sess.run(tf.initialize_all_variables())

In [118]:
def get_feed_dict(input_data, label_data, fr, to):
    batch_input = input_data[fr : to]
    batch_input_t = [ [x[i] for x in batch_input] for i in range(10)]
    batch_label = label_data[fr : to]
    batch_label_t = [ [x[i] for x in batch_label] for i in range(10)]
    return dict(zip(inputs + labels, batch_input_t + batch_label_t))


In [119]:
for epoch in range(5):
    for batch_start in range(0, len(input_data), 128):
        #print batch_start
        feed_dict = get_feed_dict(input_data, label_data, batch_start, batch_start + 128)
        sess.run(train_step, feed_dict=feed_dict)
    print sess.run(loss, feed_dict=get_feed_dict(input_test, label_test, 0, len(input_test)))

0.152143
0.0379232
0.0132509
0.0011804
0.00599503


In [122]:
test_dict = get_feed_dict(input_test, label_test, 0, 20)
computed = [np.argmax(sess.run(output, feed_dict=test_dict), axis=1) for output in outputs]
result = [np.array([x[i] for x in computed]) for i in range(20)]

for i, o, l in zip(input_test, result, label_test[:20]):
    print i
    print o
    print l
    assert all(o == l)
    print ""


[0 0 1 0 0 1 1 1 1 0]
[0 1 1 1 1 0 0 1 0 0]
[0 1 1 1 1 0 0 1 0 0]

[1 1 0 1 0 1 1 0 0 0]
[0 0 1 0 1 0 0 1 1 1]
[0 0 1 0 1 0 0 1 1 1]

[0 1 1 0 1 1 1 1 1 1]
[1 1 1 1 1 1 0 1 1 0]
[1 1 1 1 1 1 0 1 1 0]

[1 1 1 1 1 1 1 1 1 0]
[0 0 0 0 0 0 0 0 0 1]
[0 0 0 0 0 0 0 0 0 1]

[0 1 0 1 1 0 0 1 1 1]
[1 1 1 0 0 1 1 0 1 0]
[1 1 1 0 0 1 1 0 1 0]

[0 0 0 0 0 0 0 0 1 0]
[0 1 0 0 0 0 0 0 0 0]
[0 1 0 0 0 0 0 0 0 0]

[1 0 1 0 1 1 0 1 1 1]
[0 1 0 1 0 0 1 0 0 0]
[0 1 0 1 0 0 1 0 0 0]

[0 1 0 0 0 0 0 0 1 1]
[1 1 0 0 0 0 0 0 1 0]
[1 1 0 0 0 0 0 0 1 0]

[1 0 1 0 1 1 1 0 1 0]
[0 1 0 1 0 0 0 1 0 1]
[0 1 0 1 0 0 0 1 0 1]

[0 1 0 0 1 1 0 0 1 1]
[1 1 0 0 1 1 0 0 1 0]
[1 1 0 0 1 1 0 0 1 0]

[1 1 0 0 0 0 1 0 1 0]
[0 0 1 1 1 1 0 1 0 1]
[0 0 1 1 1 1 0 1 0 1]

[1 1 1 1 1 0 0 0 0 1]
[0 0 0 0 0 1 1 1 1 0]
[0 0 0 0 0 1 1 1 1 0]

[1 1 1 1 0 0 1 0 0 1]
[0 0 0 0 1 1 0 1 1 0]
[0 0 0 0 1 1 0 1 1 0]

[1 0 0 1 1 1 0 1 1 1]
[0 1 1 0 0 0 1 0 0 0]
[0 1 1 0 0 0 1 0 0 0]

[0 1 0 1 0 0 1 1 1 0]
[0 1 1 1 0 0 1 0 1 0]
[0 1 1 1 0 0 1 0 1